diff --git a/lexer/lexer.go b/lexer/lexer.go index ed68864..6366766 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,7 +1,7 @@ package lexer import ( - //"fmt" + "fmt" "regexp" ) @@ -23,7 +23,9 @@ func createLexer(source string) *lexer { source: source, Tokens: make([]Token, 0), patterns: []regexPattern{ - {regexp.MustCompile(`\s+`), defaultHandler(SPACE, " ")}, + {regexp.MustCompile(`\s+`), skipHandler}, + {regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")}, + {regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")}, {regexp.MustCompile(`\:`), defaultHandler(COLON, ":")}, {regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")}, {regexp.MustCompile(`>`), defaultHandler(MORE, ">")}, @@ -111,3 +113,27 @@ func commentHandler(lex *lexer, regex *regexp.Regexp) { lex.incrementPosition(match[1]) } } + +func Tokenize(source string) []Token { + lex := createLexer(source) + + for !lex.atEof() { + matched := false + + for _, pattern := range lex.patterns { + location := pattern.regex.FindStringIndex(lex.currentString()) + if location != nil && location[0] == 0 { + pattern.handler(lex, pattern.regex) + matched = true + break + } + } + + if !matched { + panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString())) + } + } + + lex.push(Token{EOF, "EOF"}) + return lex.Tokens +} diff --git a/lexer/tokens.go b/lexer/tokens.go index 183c7fb..e638c24 100644 --- a/lexer/tokens.go +++ b/lexer/tokens.go @@ -1,5 +1,7 @@ package lexer +import "fmt" + // что есть в запросе? // строковые литералы // двоеточия @@ -17,7 +19,8 @@ package lexer type TokenType int const ( - SYMBOL TokenType = iota + EOF TokenType = iota + SYMBOL COLON EXCLAMATION EQUAL @@ -31,6 +34,8 @@ const ( LESS_EQUAL OPEN_BRACE CLOSED_BRACE + MINUS + PLUS COMMENT PIPE NUMBER @@ -44,6 +49,73 @@ type Token struct { value string } -func Parse(str string) { - +func TokenKindString(kind TokenType) string { + switch kind { + case EOF: + return "eof" + case SYMBOL: + return "symbol" + case NUMBER: + return "number" + case STRING_LITERAL: + return "string" + case FLOAT_NUMBER: + return "float" + case OPEN_BRACE: + return "open_paren" + case CLOSED_BRACE: + return "close_paren" + case EQUAL: + return "equals" + case NOT_EQUAL: + return "not_equals" + case NOT: + return "not" + case OR: + return "or" + case AND: + return "and" + case COLON: + return "colon" + case EXCLAMATION: + return "exclamation" + case MORE: + return "more" + case LESS: + return "less" + case MORE_EQUAL: + return "more_equal" + case LESS_EQUAL: + return "less_equal" + case COMMENT: + return "comment" + case PIPE: + return "pipe" + case MINUS: + return "minus" + case PLUS: + return "plus" + case SPACE: + return "space" + default: + return fmt.Sprintf("unknown(%d)", kind) + } +} + +func (tk Token) IsOneOfMany(expectedTokens ...TokenType) bool { + for _, expected := range expectedTokens { + if expected == tk.tokenType { + return true + } + } + + return false +} + +func Debug(token Token) { + if token.tokenType == SYMBOL || token.tokenType == NUMBER || token.tokenType == STRING_LITERAL { + fmt.Printf("%s(%s)\n", TokenKindString(token.tokenType), token.value) + } else { + fmt.Printf("%s()\n", TokenKindString(token.tokenType)) + } } diff --git a/main.go b/main.go index 7905807..624aa7e 100644 --- a/main.go +++ b/main.go @@ -1,5 +1,10 @@ package main -func main() { +import "github.com/e1lama/spl/lexer" +func main() { + tokens := lexer.Tokenize(`index=security (action=failed OR action=denied or action=accepted) host!="localhost" | stats count by dest_ip | sort -count`) + for _, token := range tokens { + lexer.Debug(token) + } }