From 39867908f7e04ccdf7b44ed67957d1a9bb719415 Mon Sep 17 00:00:00 2001 From: HiveBeats Date: Sun, 7 Dec 2025 00:32:24 +0700 Subject: [PATCH] initial commit --- .gitignore | 28 ++++++++++++ go.mod | 3 ++ lexer/lexer.go | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ lexer/tokens.go | 49 +++++++++++++++++++++ main.go | 5 +++ 5 files changed, 198 insertions(+) create mode 100644 .gitignore create mode 100644 go.mod create mode 100644 lexer/lexer.go create mode 100644 lexer/tokens.go create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..062d547 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Binaries for the current project +*.exe +*.dll +*.so +*.dylib + +# Go build cache +.cache/go-build/ + +# IDE-specific files and directories +.idea/ +.vscode/ +*.iml +*.ipr +*.iws + +# Operating system specific files +.DS_Store +Thumbs.db + +# Test coverage output +*.out + +# Dependency management tools (if not vendoring) +vendor/ # Uncomment if you are not vendoring dependencies + +# Log files +*.log \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..a221834 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/e1lama/spl + +go 1.25.1 diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..ed68864 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,113 @@ +package lexer + +import ( + //"fmt" + "regexp" +) + +type regexPattern struct { + regex *regexp.Regexp + handler regexHandler +} + +type lexer struct { + patterns []regexPattern + Tokens []Token + source string + position int +} + +func createLexer(source string) *lexer { + lex := &lexer{ + position: 0, + source: source, + Tokens: make([]Token, 0), + patterns: []regexPattern{ + {regexp.MustCompile(`\s+`), defaultHandler(SPACE, " ")}, + {regexp.MustCompile(`\:`), defaultHandler(COLON, ":")}, + {regexp.MustCompile(`>=`), defaultHandler(MORE_EQUAL, ">=")}, + {regexp.MustCompile(`>`), defaultHandler(MORE, ">")}, + {regexp.MustCompile(`<=`), defaultHandler(LESS_EQUAL, "<=")}, + {regexp.MustCompile(`<`), defaultHandler(LESS, "<")}, + {regexp.MustCompile(`!=`), defaultHandler(NOT_EQUAL, "!=")}, + {regexp.MustCompile(`=`), defaultHandler(EQUAL, "=")}, + {regexp.MustCompile(`!`), defaultHandler(EXCLAMATION, "!")}, + {regexp.MustCompile(`\band\b|\bAND\b`), defaultHandler(AND, "and")}, + {regexp.MustCompile(`\bnot\b|\bNOT\b`), defaultHandler(NOT, "not")}, + {regexp.MustCompile(`\bor\b|\bOR\b`), defaultHandler(OR, "or")}, + {regexp.MustCompile(`\(`), defaultHandler(OPEN_BRACE, "(")}, + {regexp.MustCompile(`\)`), defaultHandler(CLOSED_BRACE, ")")}, + {regexp.MustCompile(`\|`), defaultHandler(PIPE, "|")}, + {regexp.MustCompile(`//(.*)$`), commentHandler}, //defaultHandler(COMMENT, value string)}, + {regexp.MustCompile(`[-+]?[0-9]`), numberHandler}, //NUMBER, + {regexp.MustCompile(`([0-9]*\.?[0-9]+)`), floatHandler}, //FLOAT_NUMBER + {regexp.MustCompile(`"([^"]*)"`), stringHandler}, //STRING_LITERAL, + {regexp.MustCompile(`\b\w+\b`), symbolHandler}, //SYMBOL + }, + } + return lex +} + +func (this *lexer) incrementPosition(n int) { + this.position += n +} + +func (this *lexer) push(token Token) { + this.Tokens = append(this.Tokens, token) + this.incrementPosition(len(token.value)) +} + +func (lex *lexer) at() byte { + return lex.source[lex.position] +} + +func (lex *lexer) currentString() string { + return lex.source[lex.position:] +} + +func (lex *lexer) atEof() bool { + return lex.position >= len(lex.source) +} + +type regexHandler func(lex *lexer, regex *regexp.Regexp) + +func defaultHandler(tokenType TokenType, value string) regexHandler { + return func(lex *lexer, regex *regexp.Regexp) { + lex.push(Token{tokenType, value}) + } +} + +func stringHandler(lex *lexer, regex *regexp.Regexp) { + match := regex.FindStringIndex(lex.currentString()) + stringLiteral := lex.currentString()[match[0]:match[1]] + + lex.push(Token{STRING_LITERAL, stringLiteral}) +} + +func numberHandler(lex *lexer, regex *regexp.Regexp) { + match := regex.FindString(lex.currentString()) + lex.push(Token{NUMBER, match}) +} + +func floatHandler(lex *lexer, regex *regexp.Regexp) { + match := regex.FindString(lex.currentString()) + lex.push(Token{NUMBER, match}) +} + +func symbolHandler(lex *lexer, regex *regexp.Regexp) { + //todo: if reserved keyword, insert "IDENTIFIER" token + match := regex.FindString(lex.currentString()) + lex.push(Token{SYMBOL, match}) +} + +func skipHandler(lex *lexer, regex *regexp.Regexp) { + match := regex.FindStringIndex(lex.currentString()) + lex.incrementPosition(match[1]) +} + +func commentHandler(lex *lexer, regex *regexp.Regexp) { + match := regex.FindStringIndex(lex.currentString()) + if match != nil { + lex.incrementPosition(match[1]) + } +} diff --git a/lexer/tokens.go b/lexer/tokens.go new file mode 100644 index 0000000..183c7fb --- /dev/null +++ b/lexer/tokens.go @@ -0,0 +1,49 @@ +package lexer + +// что есть в запросе? +// строковые литералы +// двоеточия +// знак равенства +// знак неравенства +// AND +// NOT +// открытые скобки +// закрытые скобки +// комментарии +// пайп-символы +// числа +// строки + +type TokenType int + +const ( + SYMBOL TokenType = iota + COLON + EXCLAMATION + EQUAL + NOT_EQUAL + AND + NOT + OR + MORE + LESS + MORE_EQUAL + LESS_EQUAL + OPEN_BRACE + CLOSED_BRACE + COMMENT + PIPE + NUMBER + FLOAT_NUMBER + STRING_LITERAL + SPACE +) + +type Token struct { + tokenType TokenType + value string +} + +func Parse(str string) { + +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..7905807 --- /dev/null +++ b/main.go @@ -0,0 +1,5 @@ +package main + +func main() { + +}