fix: minor bugs && example

This commit is contained in:
2025-12-07 01:17:31 +07:00
parent 26ae65f527
commit b2ba3c707f
3 changed files with 109 additions and 6 deletions

View File

@@ -1,7 +1,7 @@
package lexer package lexer
import ( import (
//"fmt" "fmt"
"regexp" "regexp"
) )
@@ -23,7 +23,9 @@ func createLexer(source string) *lexer {
source: source, source: source,
Tokens: make([]Token, 0), Tokens: make([]Token, 0),
patterns: []regexPattern{ patterns: []regexPattern{
{regexp.MustCompile(`\s+`), defaultHandler(SPACE, " ")}, {regexp.MustCompile(`\s+`), skipHandler},
{regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")},
{regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")},
{regexp.MustCompile(`\:`), defaultHandler(COLON, ":")}, {regexp.MustCompile(`\:`), defaultHandler(COLON, ":")},
{regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")}, {regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")},
{regexp.MustCompile(`>`), defaultHandler(MORE, ">")}, {regexp.MustCompile(`>`), defaultHandler(MORE, ">")},
@@ -111,3 +113,27 @@ func commentHandler(lex *lexer, regex *regexp.Regexp) {
lex.incrementPosition(match[1]) lex.incrementPosition(match[1])
} }
} }
func Tokenize(source string) []Token {
lex := createLexer(source)
for !lex.atEof() {
matched := false
for _, pattern := range lex.patterns {
location := pattern.regex.FindStringIndex(lex.currentString())
if location != nil && location[0] == 0 {
pattern.handler(lex, pattern.regex)
matched = true
break
}
}
if !matched {
panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString()))
}
}
lex.push(Token{EOF, "EOF"})
return lex.Tokens
}

View File

@@ -1,5 +1,7 @@
package lexer package lexer
import "fmt"
// что есть в запросе? // что есть в запросе?
// строковые литералы // строковые литералы
// двоеточия // двоеточия
@@ -17,7 +19,8 @@ package lexer
type TokenType int type TokenType int
const ( const (
SYMBOL TokenType = iota EOF TokenType = iota
SYMBOL
COLON COLON
EXCLAMATION EXCLAMATION
EQUAL EQUAL
@@ -31,6 +34,8 @@ const (
LESS_EQUAL LESS_EQUAL
OPEN_BRACE OPEN_BRACE
CLOSED_BRACE CLOSED_BRACE
MINUS
PLUS
COMMENT COMMENT
PIPE PIPE
NUMBER NUMBER
@@ -44,6 +49,73 @@ type Token struct {
value string value string
} }
func Parse(str string) { func TokenKindString(kind TokenType) string {
switch kind {
case EOF:
return "eof"
case SYMBOL:
return "symbol"
case NUMBER:
return "number"
case STRING_LITERAL:
return "string"
case FLOAT_NUMBER:
return "float"
case OPEN_BRACE:
return "open_paren"
case CLOSED_BRACE:
return "close_paren"
case EQUAL:
return "equals"
case NOT_EQUAL:
return "not_equals"
case NOT:
return "not"
case OR:
return "or"
case AND:
return "and"
case COLON:
return "colon"
case EXCLAMATION:
return "exclamation"
case MORE:
return "more"
case LESS:
return "less"
case MORE_EQUAL:
return "more_equal"
case LESS_EQUAL:
return "less_equal"
case COMMENT:
return "comment"
case PIPE:
return "pipe"
case MINUS:
return "minus"
case PLUS:
return "plus"
case SPACE:
return "space"
default:
return fmt.Sprintf("unknown(%d)", kind)
}
}
func (tk Token) IsOneOfMany(expectedTokens ...TokenType) bool {
for _, expected := range expectedTokens {
if expected == tk.tokenType {
return true
}
}
return false
}
func Debug(token Token) {
if token.tokenType == SYMBOL || token.tokenType == NUMBER || token.tokenType == STRING_LITERAL {
fmt.Printf("%s(%s)\n", TokenKindString(token.tokenType), token.value)
} else {
fmt.Printf("%s()\n", TokenKindString(token.tokenType))
}
} }

View File

@@ -1,5 +1,10 @@
package main package main
func main() { import "github.com/e1lama/spl/lexer"
func main() {
tokens := lexer.Tokenize(`index=security (action=failed OR action=denied or action=accepted) host!="localhost" | stats count by dest_ip | sort -count`)
for _, token := range tokens {
lexer.Debug(token)
}
} }