commit ccf63a28ee6a4ab1d9e1fa1a264a36fd465715e8 Author: Chuck Smith Date: Sun Jan 14 21:02:54 2024 -0500 initial monkey lexer diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b5bf145 --- /dev/null +++ b/.gitignore @@ -0,0 +1,78 @@ +### JetBrains+all template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..09944b1 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/monkey.iml b/.idea/monkey.iml new file mode 100644 index 0000000..5e764c4 --- /dev/null +++ b/.idea/monkey.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3c6204e --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module monkey + +go 1.21 diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..d04bd69 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,106 @@ +package lexer + +import "monkey/token" + +type Lexer struct { + input string + position int // current position in input (point to current char) + readPosition int // current reading position in input (after current char) + ch byte // current char under exclamation +} + +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + l.position = l.readPosition + l.readPosition += 1 +} + +func (l *Lexer) NextToken() token.Token { + var tok token.Token + + l.skipWhitespace() + + switch l.ch { + case '=': + tok = newToken(token.ASSIGN, l.ch) + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case '+': + tok = newToken(token.PLUS, l.ch) + case '{': + tok = newToken(token.LBRACE, l.ch) + case '}': + tok = newToken(token.RBRACE, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + } + + l.readChar() + return tok +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{ + Type: tokenType, + Literal: string(ch), + } +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..42a604e --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,77 @@ +package lexer + +import ( + "monkey/token" + "testing" +) + +func TestNextToken(t *testing.T) { + input := `let five = 5; +let ten = 10; + +let add = fn(x, y) { + x + y; +}; + +let result = add(five, ten); +` + + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.LET, "let"}, + {token.IDENT, "five"}, + {token.ASSIGN, "="}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, + {token.LPAREN, "("}, + {token.IDENT, "x"}, + {token.COMMA, ","}, + {token.IDENT, "y"}, + {token.RPAREN, ")"}, + {token.LBRACE, "{"}, + {token.IDENT, "x"}, + {token.PLUS, "+"}, + {token.IDENT, "y"}, + {token.SEMICOLON, ";"}, + {token.RBRACE, "}"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "result"}, + {token.ASSIGN, "="}, + {token.IDENT, "add"}, + {token.LPAREN, "("}, + {token.IDENT, "five"}, + {token.COMMA, ","}, + {token.IDENT, "ten"}, + {token.RPAREN, ")"}, + {token.SEMICOLON, ";"}, + {token.EOF, ""}, + } + + l := New(input) + + for i, tt := range tests { + tok := l.NextToken() + + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", + i, tt.expectedType, tok.Type) + } + + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", + i, tt.expectedLiteral, tok.Literal) + } + } +} diff --git a/token/token.go b/token/token.go new file mode 100644 index 0000000..76b951a --- /dev/null +++ b/token/token.go @@ -0,0 +1,46 @@ +package token + +type TokenType string + +type Token struct { + Type TokenType + Literal string +} + +const ( + ILLEGAL = "ILLEGAL" + EOF = "EOF" + + // Identifiers + literals + IDENT = "IDENT" // add, foobar, x, y + INT = "INT" // 123456 + + // Operators + ASSIGN = "=" + PLUS = "+" + + // Delimiters + COMMA = "," + SEMICOLON = ";" + + LPAREN = "(" + RPAREN = ")" + LBRACE = "{" + RBRACE = "}" + + // Keywords + FUNCTION = "FUNCTION" + LET = "LET" +) + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}