diff --git a/lexer/lexer.go b/lexer/lexer.go index e123bfd..1896363 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,6 +1,7 @@ package lexer import ( + "encoding/hex" "monkey/token" "strings" ) @@ -119,7 +120,13 @@ func (l *Lexer) NextToken() token.Token { tok.Type = token.EOF case '"': tok.Type = token.STRING - tok.Literal = l.readString() + str, err := l.readString() + if err != nil { + tok = newToken(token.ILLEGAL, l.prevCh) + } else { + tok.Type = token.STRING + tok.Literal = str + } case '[': tok = newToken(token.LBRACKET, l.ch) case ']': @@ -181,12 +188,39 @@ func (l *Lexer) skipWhitespace() { } } -func (l *Lexer) readString() string { +func (l *Lexer) readString() (string, error) { b := &strings.Builder{} for { l.readChar() - if l.ch == '\\' && l.peekChar() == '"' { + if l.ch == '\\' { + switch l.peekChar() { + case '"': + b.WriteByte('"') + case 'n': + b.WriteString("\n") + case 'r': + b.WriteString("\r") + case 't': + b.WriteString("\t") + case '\\': + b.WriteString("\\") + case 'x': + // Skip over the '\\', 'x' and the next two bytes (hex) + l.readChar() + l.readChar() + l.readChar() + src := string([]byte{l.prevCh, l.ch}) + dst, err := hex.DecodeString(src) + if err != nil { + return "", err + } + b.Write(dst) + continue + } + + // Skip over the '\\' and the matched single escape char l.readChar() + continue } else { if l.ch == '"' || l.ch == 0 { break @@ -195,7 +229,7 @@ func (l *Lexer) readString() string { b.WriteByte(l.ch) } - return b.String() + return b.String(), nil } func (l *Lexer) readLine() string { diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index cb4cf3f..79e3f06 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -158,3 +158,48 @@ func TestNextToken(t *testing.T) { } } } + +func TestStringEscapes(t *testing.T) { + input := `#!./monkey-lang +let a = "\"foo\"" +let b = "\x00\x0a\x7f" +let c = "\r\n\t" +` + + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.COMMENT, "!./monkey-lang"}, + {token.LET, "let"}, + {token.IDENT, "a"}, + {token.ASSIGN, "="}, + {token.STRING, "\"foo\""}, + {token.LET, "let"}, + {token.IDENT, "b"}, + {token.ASSIGN, "="}, + {token.STRING, "\x00\n\u007f"}, + {token.LET, "let"}, + {token.IDENT, "c"}, + {token.ASSIGN, "="}, + {token.STRING, "\r\n\t"}, + {token.EOF, ""}, + } + + lexer := New(input) + + for i, test := range tests { + token := lexer.NextToken() + + if token.Type != test.expectedType { + t.Fatalf("tests[%d] - token type wrong. expected=%q, got=%q", + i, test.expectedType, token.Type) + } + + if token.Literal != test.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", + i, test.expectedLiteral, token.Literal) + } + } + +}