140 lines
3.9 KiB
Go
140 lines
3.9 KiB
Go
package lexer
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
)
|
|
|
|
type regexPattern struct {
|
|
regex *regexp.Regexp
|
|
handler regexHandler
|
|
}
|
|
|
|
type lexer struct {
|
|
patterns []regexPattern
|
|
Tokens []Token
|
|
source string
|
|
position int
|
|
}
|
|
|
|
func createLexer(source string) *lexer {
|
|
lex := &lexer{
|
|
position: 0,
|
|
source: source,
|
|
Tokens: make([]Token, 0),
|
|
patterns: []regexPattern{
|
|
{regexp.MustCompile(`\s+`), skipHandler},
|
|
{regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")},
|
|
{regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")},
|
|
{regexp.MustCompile(`\:`), defaultHandler(COLON, ":")},
|
|
{regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")},
|
|
{regexp.MustCompile(`>`), defaultHandler(MORE, ">")},
|
|
{regexp.MustCompile(`<=`), defaultHandler(LESS_EQUAL, "<=")},
|
|
{regexp.MustCompile(`<`), defaultHandler(LESS, "<")},
|
|
{regexp.MustCompile(`!=`), defaultHandler(NOT_EQUAL, "!=")},
|
|
{regexp.MustCompile(`=`), defaultHandler(EQUAL, "=")},
|
|
{regexp.MustCompile(`!`), defaultHandler(EXCLAMATION, "!")},
|
|
{regexp.MustCompile(`\band\b|\bAND\b`), defaultHandler(AND, "and")},
|
|
{regexp.MustCompile(`\bnot\b|\bNOT\b`), defaultHandler(NOT, "not")},
|
|
{regexp.MustCompile(`\bor\b|\bOR\b`), defaultHandler(OR, "or")},
|
|
{regexp.MustCompile(`\(`), defaultHandler(OPEN_BRACE, "(")},
|
|
{regexp.MustCompile(`\)`), defaultHandler(CLOSED_BRACE, ")")},
|
|
{regexp.MustCompile(`\|`), defaultHandler(PIPE, "|")},
|
|
{regexp.MustCompile(`//(.*)$`), commentHandler}, //defaultHandler(COMMENT, value string)},
|
|
{regexp.MustCompile(`[-+]?[0-9]`), numberHandler}, //NUMBER,
|
|
{regexp.MustCompile(`([0-9]*\.?[0-9]+)`), floatHandler}, //FLOAT_NUMBER
|
|
{regexp.MustCompile(`"([^"]*)"`), stringHandler}, //STRING_LITERAL,
|
|
{regexp.MustCompile(`\b\w+\b`), symbolHandler}, //SYMBOL
|
|
},
|
|
}
|
|
return lex
|
|
}
|
|
|
|
func (this *lexer) incrementPosition(n int) {
|
|
this.position += n
|
|
}
|
|
|
|
func (this *lexer) push(token Token) {
|
|
this.Tokens = append(this.Tokens, token)
|
|
this.incrementPosition(len(token.value))
|
|
}
|
|
|
|
func (lex *lexer) at() byte {
|
|
return lex.source[lex.position]
|
|
}
|
|
|
|
func (lex *lexer) currentString() string {
|
|
return lex.source[lex.position:]
|
|
}
|
|
|
|
func (lex *lexer) atEof() bool {
|
|
return lex.position >= len(lex.source)
|
|
}
|
|
|
|
type regexHandler func(lex *lexer, regex *regexp.Regexp)
|
|
|
|
func defaultHandler(tokenType TokenType, value string) regexHandler {
|
|
return func(lex *lexer, regex *regexp.Regexp) {
|
|
lex.push(Token{tokenType, value})
|
|
}
|
|
}
|
|
|
|
func stringHandler(lex *lexer, regex *regexp.Regexp) {
|
|
match := regex.FindStringIndex(lex.currentString())
|
|
stringLiteral := lex.currentString()[match[0]:match[1]]
|
|
|
|
lex.push(Token{STRING_LITERAL, stringLiteral})
|
|
}
|
|
|
|
func numberHandler(lex *lexer, regex *regexp.Regexp) {
|
|
match := regex.FindString(lex.currentString())
|
|
lex.push(Token{NUMBER, match})
|
|
}
|
|
|
|
func floatHandler(lex *lexer, regex *regexp.Regexp) {
|
|
match := regex.FindString(lex.currentString())
|
|
lex.push(Token{NUMBER, match})
|
|
}
|
|
|
|
func symbolHandler(lex *lexer, regex *regexp.Regexp) {
|
|
//todo: if reserved keyword, insert "IDENTIFIER" token
|
|
match := regex.FindString(lex.currentString())
|
|
lex.push(Token{SYMBOL, match})
|
|
}
|
|
|
|
func skipHandler(lex *lexer, regex *regexp.Regexp) {
|
|
match := regex.FindStringIndex(lex.currentString())
|
|
lex.incrementPosition(match[1])
|
|
}
|
|
|
|
func commentHandler(lex *lexer, regex *regexp.Regexp) {
|
|
match := regex.FindStringIndex(lex.currentString())
|
|
if match != nil {
|
|
lex.incrementPosition(match[1])
|
|
}
|
|
}
|
|
|
|
func Tokenize(source string) []Token {
|
|
lex := createLexer(source)
|
|
|
|
for !lex.atEof() {
|
|
matched := false
|
|
|
|
for _, pattern := range lex.patterns {
|
|
location := pattern.regex.FindStringIndex(lex.currentString())
|
|
if location != nil && location[0] == 0 {
|
|
pattern.handler(lex, pattern.regex)
|
|
matched = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !matched {
|
|
panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString()))
|
|
}
|
|
}
|
|
|
|
lex.push(Token{EOF, "EOF"})
|
|
return lex.Tokens
|
|
}
|