Files
spl/lexer/lexer.go
2025-12-07 01:17:31 +07:00

140 lines
3.9 KiB
Go

package lexer
import (
"fmt"
"regexp"
)
type regexPattern struct {
regex *regexp.Regexp
handler regexHandler
}
type lexer struct {
patterns []regexPattern
Tokens []Token
source string
position int
}
func createLexer(source string) *lexer {
lex := &lexer{
position: 0,
source: source,
Tokens: make([]Token, 0),
patterns: []regexPattern{
{regexp.MustCompile(`\s+`), skipHandler},
{regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")},
{regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")},
{regexp.MustCompile(`\:`), defaultHandler(COLON, ":")},
{regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")},
{regexp.MustCompile(`>`), defaultHandler(MORE, ">")},
{regexp.MustCompile(`<=`), defaultHandler(LESS_EQUAL, "<=")},
{regexp.MustCompile(`<`), defaultHandler(LESS, "<")},
{regexp.MustCompile(`!=`), defaultHandler(NOT_EQUAL, "!=")},
{regexp.MustCompile(`=`), defaultHandler(EQUAL, "=")},
{regexp.MustCompile(`!`), defaultHandler(EXCLAMATION, "!")},
{regexp.MustCompile(`\band\b|\bAND\b`), defaultHandler(AND, "and")},
{regexp.MustCompile(`\bnot\b|\bNOT\b`), defaultHandler(NOT, "not")},
{regexp.MustCompile(`\bor\b|\bOR\b`), defaultHandler(OR, "or")},
{regexp.MustCompile(`\(`), defaultHandler(OPEN_BRACE, "(")},
{regexp.MustCompile(`\)`), defaultHandler(CLOSED_BRACE, ")")},
{regexp.MustCompile(`\|`), defaultHandler(PIPE, "|")},
{regexp.MustCompile(`//(.*)$`), commentHandler}, //defaultHandler(COMMENT, value string)},
{regexp.MustCompile(`[-+]?[0-9]`), numberHandler}, //NUMBER,
{regexp.MustCompile(`([0-9]*\.?[0-9]+)`), floatHandler}, //FLOAT_NUMBER
{regexp.MustCompile(`"([^"]*)"`), stringHandler}, //STRING_LITERAL,
{regexp.MustCompile(`\b\w+\b`), symbolHandler}, //SYMBOL
},
}
return lex
}
func (this *lexer) incrementPosition(n int) {
this.position += n
}
func (this *lexer) push(token Token) {
this.Tokens = append(this.Tokens, token)
this.incrementPosition(len(token.value))
}
func (lex *lexer) at() byte {
return lex.source[lex.position]
}
func (lex *lexer) currentString() string {
return lex.source[lex.position:]
}
func (lex *lexer) atEof() bool {
return lex.position >= len(lex.source)
}
type regexHandler func(lex *lexer, regex *regexp.Regexp)
func defaultHandler(tokenType TokenType, value string) regexHandler {
return func(lex *lexer, regex *regexp.Regexp) {
lex.push(Token{tokenType, value})
}
}
func stringHandler(lex *lexer, regex *regexp.Regexp) {
match := regex.FindStringIndex(lex.currentString())
stringLiteral := lex.currentString()[match[0]:match[1]]
lex.push(Token{STRING_LITERAL, stringLiteral})
}
func numberHandler(lex *lexer, regex *regexp.Regexp) {
match := regex.FindString(lex.currentString())
lex.push(Token{NUMBER, match})
}
func floatHandler(lex *lexer, regex *regexp.Regexp) {
match := regex.FindString(lex.currentString())
lex.push(Token{NUMBER, match})
}
func symbolHandler(lex *lexer, regex *regexp.Regexp) {
//todo: if reserved keyword, insert "IDENTIFIER" token
match := regex.FindString(lex.currentString())
lex.push(Token{SYMBOL, match})
}
func skipHandler(lex *lexer, regex *regexp.Regexp) {
match := regex.FindStringIndex(lex.currentString())
lex.incrementPosition(match[1])
}
func commentHandler(lex *lexer, regex *regexp.Regexp) {
match := regex.FindStringIndex(lex.currentString())
if match != nil {
lex.incrementPosition(match[1])
}
}
func Tokenize(source string) []Token {
lex := createLexer(source)
for !lex.atEof() {
matched := false
for _, pattern := range lex.patterns {
location := pattern.regex.FindStringIndex(lex.currentString())
if location != nil && location[0] == 0 {
pattern.handler(lex, pattern.regex)
matched = true
break
}
}
if !matched {
panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString()))
}
}
lex.push(Token{EOF, "EOF"})
return lex.Tokens
}