package lexer import ( "fmt" "regexp" ) type regexPattern struct { regex *regexp.Regexp handler regexHandler } type lexer struct { patterns []regexPattern Tokens []Token source string position int } func createLexer(source string) *lexer { lex := &lexer{ position: 0, source: source, Tokens: make([]Token, 0), patterns: []regexPattern{ {regexp.MustCompile(`\s+`), skipHandler}, {regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")}, {regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")}, {regexp.MustCompile(`\:`), defaultHandler(COLON, ":")}, {regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")}, {regexp.MustCompile(`>`), defaultHandler(MORE, ">")}, {regexp.MustCompile(`<=`), defaultHandler(LESS_EQUAL, "<=")}, {regexp.MustCompile(`<`), defaultHandler(LESS, "<")}, {regexp.MustCompile(`!=`), defaultHandler(NOT_EQUAL, "!=")}, {regexp.MustCompile(`=`), defaultHandler(EQUAL, "=")}, {regexp.MustCompile(`!`), defaultHandler(EXCLAMATION, "!")}, {regexp.MustCompile(`\band\b|\bAND\b`), defaultHandler(AND, "and")}, {regexp.MustCompile(`\bnot\b|\bNOT\b`), defaultHandler(NOT, "not")}, {regexp.MustCompile(`\bor\b|\bOR\b`), defaultHandler(OR, "or")}, {regexp.MustCompile(`\(`), defaultHandler(OPEN_BRACE, "(")}, {regexp.MustCompile(`\)`), defaultHandler(CLOSED_BRACE, ")")}, {regexp.MustCompile(`\|`), defaultHandler(PIPE, "|")}, {regexp.MustCompile(`//(.*)$`), commentHandler}, //defaultHandler(COMMENT, value string)}, {regexp.MustCompile(`[-+]?[0-9]`), numberHandler}, //NUMBER, {regexp.MustCompile(`([0-9]*\.?[0-9]+)`), floatHandler}, //FLOAT_NUMBER {regexp.MustCompile(`"([^"]*)"`), stringHandler}, //STRING_LITERAL, {regexp.MustCompile(`\b\w+\b`), symbolHandler}, //SYMBOL }, } return lex } func (this *lexer) incrementPosition(n int) { this.position += n } func (this *lexer) push(token Token) { this.Tokens = append(this.Tokens, token) this.incrementPosition(len(token.value)) } func (lex *lexer) at() byte { return lex.source[lex.position] } func (lex *lexer) currentString() string { return lex.source[lex.position:] } func (lex *lexer) atEof() bool { return lex.position >= len(lex.source) } type regexHandler func(lex *lexer, regex *regexp.Regexp) func defaultHandler(tokenType TokenType, value string) regexHandler { return func(lex *lexer, regex *regexp.Regexp) { lex.push(Token{tokenType, value}) } } func stringHandler(lex *lexer, regex *regexp.Regexp) { match := regex.FindStringIndex(lex.currentString()) stringLiteral := lex.currentString()[match[0]:match[1]] lex.push(Token{STRING_LITERAL, stringLiteral}) } func numberHandler(lex *lexer, regex *regexp.Regexp) { match := regex.FindString(lex.currentString()) lex.push(Token{NUMBER, match}) } func floatHandler(lex *lexer, regex *regexp.Regexp) { match := regex.FindString(lex.currentString()) lex.push(Token{NUMBER, match}) } func symbolHandler(lex *lexer, regex *regexp.Regexp) { //todo: if reserved keyword, insert "IDENTIFIER" token match := regex.FindString(lex.currentString()) lex.push(Token{SYMBOL, match}) } func skipHandler(lex *lexer, regex *regexp.Regexp) { match := regex.FindStringIndex(lex.currentString()) lex.incrementPosition(match[1]) } func commentHandler(lex *lexer, regex *regexp.Regexp) { match := regex.FindStringIndex(lex.currentString()) if match != nil { lex.incrementPosition(match[1]) } } func Tokenize(source string) []Token { lex := createLexer(source) for !lex.atEof() { matched := false for _, pattern := range lex.patterns { location := pattern.regex.FindStringIndex(lex.currentString()) if location != nil && location[0] == 0 { pattern.handler(lex, pattern.regex) matched = true break } } if !matched { panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString())) } } lex.push(Token{EOF, "EOF"}) return lex.Tokens }