string escapes

2024-03-19 20:39:55 -04:00
parent be81b9a6d6
commit e320cd1e68
2 changed files with 83 additions and 4 deletions
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@@ -1,6 +1,7 @@
 package lexer

 import (
+	"encoding/hex"
 	"monkey/token"
 	"strings"
 )
@@ -119,7 +120,13 @@ func (l *Lexer) NextToken() token.Token {
 		tok.Type = token.EOF
 	case '"':
 		tok.Type = token.STRING
-		tok.Literal = l.readString()
+		str, err := l.readString()
+		if err != nil {
+			tok = newToken(token.ILLEGAL, l.prevCh)
+		} else {
+			tok.Type = token.STRING
+			tok.Literal = str
+		}
 	case '[':
 		tok = newToken(token.LBRACKET, l.ch)
 	case ']':
@@ -181,12 +188,39 @@ func (l *Lexer) skipWhitespace() {
 	}
 }

-func (l *Lexer) readString() string {
+func (l *Lexer) readString() (string, error) {
 	b := &strings.Builder{}
 	for {
 		l.readChar()
-		if l.ch == '\\' && l.peekChar() == '"' {
+		if l.ch == '\\' {
+			switch l.peekChar() {
+			case '"':
+				b.WriteByte('"')
+			case 'n':
+				b.WriteString("\n")
+			case 'r':
+				b.WriteString("\r")
+			case 't':
+				b.WriteString("\t")
+			case '\\':
+				b.WriteString("\\")
+			case 'x':
+				// Skip over the '\\', 'x' and the next two bytes (hex)
+				l.readChar()
+				l.readChar()
+				l.readChar()
+				src := string([]byte{l.prevCh, l.ch})
+				dst, err := hex.DecodeString(src)
+				if err != nil {
+					return "", err
+				}
+				b.Write(dst)
+				continue
+			}
+
+			// Skip over the '\\' and the matched single escape char
 			l.readChar()
+			continue
 		} else {
 			if l.ch == '"' || l.ch == 0 {
 				break
@@ -195,7 +229,7 @@ func (l *Lexer) readString() string {

 		b.WriteByte(l.ch)
 	}
-	return b.String()
+	return b.String(), nil
 }

 func (l *Lexer) readLine() string {
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@@ -158,3 +158,48 @@ func TestNextToken(t *testing.T) {
 		}
 	}
 }
+
+func TestStringEscapes(t *testing.T) {
+	input := `#!./monkey-lang
+let a = "\"foo\""
+let b = "\x00\x0a\x7f"
+let c = "\r\n\t"
+`
+
+	tests := []struct {
+		expectedType    token.TokenType
+		expectedLiteral string
+	}{
+		{token.COMMENT, "!./monkey-lang"},
+		{token.LET, "let"},
+		{token.IDENT, "a"},
+		{token.ASSIGN, "="},
+		{token.STRING, "\"foo\""},
+		{token.LET, "let"},
+		{token.IDENT, "b"},
+		{token.ASSIGN, "="},
+		{token.STRING, "\x00\n\u007f"},
+		{token.LET, "let"},
+		{token.IDENT, "c"},
+		{token.ASSIGN, "="},
+		{token.STRING, "\r\n\t"},
+		{token.EOF, ""},
+	}
+
+	lexer := New(input)
+
+	for i, test := range tests {
+		token := lexer.NextToken()
+
+		if token.Type != test.expectedType {
+			t.Fatalf("tests[%d] - token type wrong. expected=%q, got=%q",
+				i, test.expectedType, token.Type)
+		}
+
+		if token.Literal != test.expectedLiteral {
+			t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q",
+				i, test.expectedLiteral, token.Literal)
+		}
+	}
+
+}