fix: minor bugs && example

This commit is contained in:
2025-12-07 01:17:31 +07:00
parent 26ae65f527
commit b2ba3c707f
3 changed files with 109 additions and 6 deletions

View File

@@ -1,7 +1,7 @@
package lexer
import (
//"fmt"
"fmt"
"regexp"
)
@@ -23,7 +23,9 @@ func createLexer(source string) *lexer {
source: source,
Tokens: make([]Token, 0),
patterns: []regexPattern{
{regexp.MustCompile(`\s+`), defaultHandler(SPACE, " ")},
{regexp.MustCompile(`\s+`), skipHandler},
{regexp.MustCompile(`\+`), defaultHandler(PLUS, "+")},
{regexp.MustCompile(`\-`), defaultHandler(MINUS, "-")},
{regexp.MustCompile(`\:`), defaultHandler(COLON, ":")},
{regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")},
{regexp.MustCompile(`>`), defaultHandler(MORE, ">")},
@@ -111,3 +113,27 @@ func commentHandler(lex *lexer, regex *regexp.Regexp) {
lex.incrementPosition(match[1])
}
}
func Tokenize(source string) []Token {
lex := createLexer(source)
for !lex.atEof() {
matched := false
for _, pattern := range lex.patterns {
location := pattern.regex.FindStringIndex(lex.currentString())
if location != nil && location[0] == 0 {
pattern.handler(lex, pattern.regex)
matched = true
break
}
}
if !matched {
panic(fmt.Sprintf("lexer error: unrecognized token near '%v'", lex.currentString()))
}
}
lex.push(Token{EOF, "EOF"})
return lex.Tokens
}

View File

@@ -1,5 +1,7 @@
package lexer
import "fmt"
// что есть в запросе?
// строковые литералы
// двоеточия
@@ -17,7 +19,8 @@ package lexer
type TokenType int
const (
SYMBOL TokenType = iota
EOF TokenType = iota
SYMBOL
COLON
EXCLAMATION
EQUAL
@@ -31,6 +34,8 @@ const (
LESS_EQUAL
OPEN_BRACE
CLOSED_BRACE
MINUS
PLUS
COMMENT
PIPE
NUMBER
@@ -44,6 +49,73 @@ type Token struct {
value string
}
func Parse(str string) {
func TokenKindString(kind TokenType) string {
switch kind {
case EOF:
return "eof"
case SYMBOL:
return "symbol"
case NUMBER:
return "number"
case STRING_LITERAL:
return "string"
case FLOAT_NUMBER:
return "float"
case OPEN_BRACE:
return "open_paren"
case CLOSED_BRACE:
return "close_paren"
case EQUAL:
return "equals"
case NOT_EQUAL:
return "not_equals"
case NOT:
return "not"
case OR:
return "or"
case AND:
return "and"
case COLON:
return "colon"
case EXCLAMATION:
return "exclamation"
case MORE:
return "more"
case LESS:
return "less"
case MORE_EQUAL:
return "more_equal"
case LESS_EQUAL:
return "less_equal"
case COMMENT:
return "comment"
case PIPE:
return "pipe"
case MINUS:
return "minus"
case PLUS:
return "plus"
case SPACE:
return "space"
default:
return fmt.Sprintf("unknown(%d)", kind)
}
}
func (tk Token) IsOneOfMany(expectedTokens ...TokenType) bool {
for _, expected := range expectedTokens {
if expected == tk.tokenType {
return true
}
}
return false
}
func Debug(token Token) {
if token.tokenType == SYMBOL || token.tokenType == NUMBER || token.tokenType == STRING_LITERAL {
fmt.Printf("%s(%s)\n", TokenKindString(token.tokenType), token.value)
} else {
fmt.Printf("%s()\n", TokenKindString(token.tokenType))
}
}

View File

@@ -1,5 +1,10 @@
package main
func main() {
import "github.com/e1lama/spl/lexer"
func main() {
tokens := lexer.Tokenize(`index=security (action=failed OR action=denied or action=accepted) host!="localhost" | stats count by dest_ip | sort -count`)
for _, token := range tokens {
lexer.Debug(token)
}
}