// // Copyright 2026 The InfiniFlow Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // package cli import ( "strings" "unicode" ) // Lexer performs lexical analysis of the input type Lexer struct { input string pos int readPos int ch byte } // NewLexer creates a new lexer for the given input func NewLexer(input string) *Lexer { l := &Lexer{input: input} l.readChar() return l } func (l *Lexer) readChar() { if l.readPos >= len(l.input) { l.ch = 0 } else { l.ch = l.input[l.readPos] } l.pos = l.readPos l.readPos++ } func (l *Lexer) peekChar() byte { if l.readPos >= len(l.input) { return 0 } return l.input[l.readPos] } func (l *Lexer) peekToken() string { // Skip whitespace starting from readPos skipPos := l.readPos for skipPos < len(l.input) && (l.input[skipPos] == ' ' || l.input[skipPos] == '\t' || l.input[skipPos] == '\n' || l.input[skipPos] == '\r') { skipPos++ } // Read identifier starting from skipPos start := skipPos for skipPos < len(l.input) && (isLetter(l.input[skipPos]) || isDigit(l.input[skipPos]) || l.input[skipPos] == '_' || l.input[skipPos] == '-' || l.input[skipPos] == '.') { skipPos++ } return l.input[start:skipPos] } func (l *Lexer) skipWhitespace() { for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } } // NextToken returns the next token from the input func (l *Lexer) NextToken() Token { var tok Token l.skipWhitespace() switch l.ch { case ';': tok = newToken(TokenSemicolon, l.ch) l.readChar() case ',': tok = newToken(TokenComma, l.ch) l.readChar() case '/': tok = newToken(TokenSlash, l.ch) l.readChar() case '-': tok = newToken(TokenDash, l.ch) l.readChar() case '[': tok = newToken(TokenLBracket, l.ch) l.readChar() case ']': tok = newToken(TokenRBracket, l.ch) l.readChar() case '\'': tok.Type = TokenQuotedString tok.Value = l.readQuotedString('\'') case '"': tok.Type = TokenQuotedString tok.Value = l.readQuotedString('"') case '\\': // Meta command: backslash followed by command name tok.Type = TokenIdentifier tok.Value = l.readMetaCommand() case 0: tok.Type = TokenEOF tok.Value = "" default: if isLetter(l.ch) || l.ch == '_' { ident := l.readIdentifier() return l.lookupIdent(ident) } else if isDigit(l.ch) { tok.Value, tok.Type = l.readNumber() return tok } tok = newToken(TokenIllegal, l.ch) l.readChar() } return tok } func (l *Lexer) readMetaCommand() string { start := l.pos l.readChar() // consume backslash for isLetter(l.ch) || l.ch == '?' { l.readChar() } return l.input[start:l.pos] } func newToken(tokenType int, ch byte) Token { return Token{Type: tokenType, Value: string(ch)} } func (l *Lexer) readIdentifier() string { start := l.pos for isLetter(l.ch) || isDigit(l.ch) || l.ch == '_' || l.ch == '-' || l.ch == '.' { l.readChar() } return l.input[start:l.pos] } func (l *Lexer) readNumber() (string, int) { start := l.pos tokenType := TokenInteger // Read integer part for isDigit(l.ch) { l.readChar() } // If encountering a decimal point followed by a digit, read as float if l.ch == '.' && isDigit(l.peekChar()) { tokenType = TokenFloat l.readChar() // Consume '.' for isDigit(l.ch) { l.readChar() } } return l.input[start:l.pos], tokenType } func (l *Lexer) readQuotedString(quote byte) string { l.readChar() // skip opening quote start := l.pos for l.ch != quote && l.ch != 0 { l.readChar() } str := l.input[start:l.pos] if l.ch == quote { l.readChar() // skip closing quote } return str } func (l *Lexer) lookupIdent(ident string) Token { upper := strings.ToUpper(ident) switch upper { case "LOGIN": return Token{Type: TokenLogin, Value: ident} case "LOGOUT": return Token{Type: TokenLogout, Value: ident} case "REGISTER": return Token{Type: TokenRegister, Value: ident} case "LIST": return Token{Type: TokenList, Value: ident} case "SERVICES": return Token{Type: TokenServices, Value: ident} case "SHOW": return Token{Type: TokenShow, Value: ident} case "CREATE": return Token{Type: TokenCreate, Value: ident} case "SERVICE": return Token{Type: TokenService, Value: ident} case "SHUTDOWN": return Token{Type: TokenShutdown, Value: ident} case "RESTART": return Token{Type: TokenRestart, Value: ident} case "USERS": return Token{Type: TokenUsers, Value: ident} case "DROP": return Token{Type: TokenDrop, Value: ident} case "USER": return Token{Type: TokenUser, Value: ident} case "ALTER": return Token{Type: TokenAlter, Value: ident} case "ACTIVE": return Token{Type: TokenActive, Value: ident} case "INACTIVE": return Token{Type: TokenInactive, Value: ident} case "ADMIN": return Token{Type: TokenAdmin, Value: ident} case "SERVER": return Token{Type: TokenServer, Value: ident} case "API": return Token{Type: TokenAPI, Value: ident} case "ADD": return Token{Type: TokenAdd, Value: ident} case "HOST": return Token{Type: TokenHost, Value: ident} case "DELETE": return Token{Type: TokenDelete, Value: ident} case "PASSWORD": return Token{Type: TokenPassword, Value: ident} case "DATASET": return Token{Type: TokenDataset, Value: ident} case "DATASETS": return Token{Type: TokenDatasets, Value: ident} case "OF": return Token{Type: TokenOf, Value: ident} case "AGENTS": return Token{Type: TokenAgents, Value: ident} case "SEARCHES": return Token{Type: TokenSearches, Value: ident} case "ROLE": return Token{Type: TokenRole, Value: ident} case "ROLES": return Token{Type: TokenRoles, Value: ident} case "DESCRIPTION": return Token{Type: TokenDescription, Value: ident} case "GRANT": return Token{Type: TokenGrant, Value: ident} case "REVOKE": return Token{Type: TokenRevoke, Value: ident} case "ALL": return Token{Type: TokenAll, Value: ident} case "PERMISSION": return Token{Type: TokenPermission, Value: ident} case "TO": return Token{Type: TokenTo, Value: ident} case "FROM": return Token{Type: TokenFrom, Value: ident} case "FOR": return Token{Type: TokenFor, Value: ident} case "RESOURCES": return Token{Type: TokenResources, Value: ident} case "ON": return Token{Type: TokenOn, Value: ident} case "SET": return Token{Type: TokenSet, Value: ident} case "UNSET": return Token{Type: TokenUnset, Value: ident} case "RESET": return Token{Type: TokenReset, Value: ident} case "VERSION": return Token{Type: TokenVersion, Value: ident} case "VAR": return Token{Type: TokenVar, Value: ident} case "VARS": return Token{Type: TokenVars, Value: ident} case "CONFIGS": return Token{Type: TokenConfigs, Value: ident} case "CONFIG": return Token{Type: TokenConfig, Value: ident} case "ENVS": return Token{Type: TokenEnvs, Value: ident} case "KEY": return Token{Type: TokenKey, Value: ident} case "KEYS": return Token{Type: TokenKeys, Value: ident} case "GENERATE": return Token{Type: TokenGenerate, Value: ident} case "MODEL": return Token{Type: TokenModel, Value: ident} case "MODELS": return Token{Type: TokenModels, Value: ident} case "PROVIDER": return Token{Type: TokenProvider, Value: ident} case "PROVIDERS": return Token{Type: TokenProviders, Value: ident} case "DEFAULT": return Token{Type: TokenDefault, Value: ident} case "CHATS": return Token{Type: TokenChats, Value: ident} case "CHAT": return Token{Type: TokenChat, Value: ident} case "OPENAI_CHAT": return Token{Type: TokenOpenaiChat, Value: ident} case "MESSAGE": return Token{Type: TokenMessage, Value: ident} case "IMAGE": return Token{Type: TokenImage, Value: ident} case "VIDEO": return Token{Type: TokenVideo, Value: ident} case "AUDIO": return Token{Type: TokenAudio, Value: ident} case "THINK": return Token{Type: TokenThink, Value: ident} case "EFFORT": return Token{Type: TokenEffort, Value: ident} case "VERBOSITY": return Token{Type: TokenVerbosity, Value: ident} case "NONE": return Token{Type: TokenNone, Value: ident} case "MINIMAL": return Token{Type: TokenMinimal, Value: ident} case "LOW": return Token{Type: TokenLow, Value: ident} case "MEDIUM": return Token{Type: TokenMedium, Value: ident} case "HIGH": return Token{Type: TokenHigh, Value: ident} case "MAX": return Token{Type: TokenMax, Value: ident} case "STORE": return Token{Type: TokenStore, Value: ident} case "STREAM": return Token{Type: TokenStream, Value: ident} case "LS": return Token{Type: TokenLS, Value: ident} case "CAT": return Token{Type: TokenCat, Value: ident} case "FILES": return Token{Type: TokenFiles, Value: ident} case "AS": return Token{Type: TokenAs, Value: ident} case "PARSE": return Token{Type: TokenParse, Value: ident} case "IMPORT": return Token{Type: TokenImport, Value: ident} case "INTO": return Token{Type: TokenInto, Value: ident} case "WITH": return Token{Type: TokenWith, Value: ident} case "PARSER": return Token{Type: TokenParser, Value: ident} case "PIPELINE": return Token{Type: TokenPipeline, Value: ident} case "GET": return Token{Type: TokenGet, Value: ident} case "SEARCH": return Token{Type: TokenSearch, Value: ident} case "AGENT": return Token{Type: TokenAgent, Value: ident} case "MEMORY": return Token{Type: TokenMemory, Value: ident} case "MEMORIES": return Token{Type: TokenMemories, Value: ident} case "RETRIEVE": return Token{Type: TokenRetrieve, Value: ident} case "CURRENT": return Token{Type: TokenCurrent, Value: ident} case "FINGERPRINT": return Token{Type: TokenFingerprint, Value: ident} case "LICENSE": return Token{Type: TokenLicense, Value: ident} case "VISION": return Token{Type: TokenVision, Value: ident} case "EMBEDDING": return Token{Type: TokenEmbedding, Value: ident} case "RERANK": return Token{Type: TokenRerank, Value: ident} case "ASR": return Token{Type: TokenASR, Value: ident} case "TTS": return Token{Type: TokenTTS, Value: ident} case "EMBED": return Token{Type: TokenEmbed, Value: ident} case "TEXT": return Token{Type: TokenText, Value: ident} case "QUERY": return Token{Type: TokenQuery, Value: ident} case "TOP": return Token{Type: TokenTop, Value: ident} case "DIMENSION": return Token{Type: TokenDimension, Value: ident} case "OCR": return Token{Type: TokenOCR, Value: ident} case "DOC_PARSE": return Token{Type: TokenDocParse, Value: ident} case "ASYNC": return Token{Type: TokenAsync, Value: ident} case "SYNC": return Token{Type: TokenSync, Value: ident} case "BENCHMARK": return Token{Type: TokenBenchmark, Value: ident} case "PING": return Token{Type: TokenPing, Value: ident} case "TOKEN": return Token{Type: TokenToken, Value: ident} case "TOKENS": return Token{Type: TokenTokens, Value: ident} case "INDEX": return Token{Type: TokenIndex, Value: ident} case "VECTOR": return Token{Type: TokenVector, Value: ident} case "SIZE": return Token{Type: TokenSize, Value: ident} case "METADATA": return Token{Type: TokenMetadata, Value: ident} case "TABLE": return Token{Type: TokenTable, Value: ident} case "AVAILABLE": return Token{Type: TokenAvailable, Value: ident} case "SUPPORTED": return Token{Type: TokenSupported, Value: ident} case "NAME": return Token{Type: TokenName, Value: ident} case "BALANCE": return Token{Type: TokenBalance, Value: ident} case "INSTANCE": return Token{Type: TokenInstance, Value: ident} case "INSTANCES": return Token{Type: TokenInstances, Value: ident} case "DISABLE": return Token{Type: TokenDisable, Value: ident} case "ENABLE": return Token{Type: TokenEnable, Value: ident} case "INSERT": return Token{Type: TokenInsert, Value: ident} case "FILE": return Token{Type: TokenFile, Value: ident} case "USE": return Token{Type: TokenUse, Value: ident} case "CHECK": return Token{Type: TokenCheck, Value: ident} case "UPDATE": return Token{Type: TokenUpdate, Value: ident} case "REMOVE": return Token{Type: TokenRemove, Value: ident} case "EXPLAIN": return Token{Type: TokenExplain, Value: ident} case "CHUNK": // Check if followed by STORE for compound token if strings.ToUpper(l.peekToken()) == "STORE" { // Skip whitespace to STORE for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { l.readChar() } // Skip past STORE for isLetter(l.ch) || isDigit(l.ch) || l.ch == '_' || l.ch == '-' || l.ch == '.' { l.readChar() } return Token{Type: TokenChunkStore, Value: "CHUNK STORE"} } return Token{Type: TokenChunk, Value: ident} case "CHUNKS": return Token{Type: TokenChunks, Value: ident} case "DOCUMENT": return Token{Type: TokenDocument, Value: ident} case "DOCUMENTS": return Token{Type: TokenDocuments, Value: ident} case "TAGS": return Token{Type: TokenTag, Value: ident} case "REGION": return Token{Type: TokenRegion, Value: ident} case "URL": return Token{Type: TokenURL, Value: ident} case "TASK": return Token{Type: TokenTask, Value: ident} case "TASKS": return Token{Type: TokenTasks, Value: ident} case "START": return Token{Type: TokenStart, Value: ident} case "STOP": return Token{Type: TokenStop, Value: ident} case "INGESTOR": return Token{Type: TokenIngestor, Value: ident} case "INGESTORS": return Token{Type: TokenIngestors, Value: ident} case "INGESTION": return Token{Type: TokenIngestion, Value: ident} case "MQ": return Token{Type: TokenMQ, Value: ident} case "PUBLISH": return Token{Type: TokenPublish, Value: ident} case "PULL": return Token{Type: TokenPull, Value: ident} case "PENDING": return Token{Type: TokenPending, Value: ident} case "NOACK": return Token{Type: TokenNoACK, Value: ident} case "ANALYZE": return Token{Type: TokenAnalyze, Value: ident} case "SUMMARY": return Token{Type: TokenSummary, Value: ident} case "STORAGE": return Token{Type: TokenStorage, Value: ident} case "QUOTA": return Token{Type: TokenQuota, Value: ident} case "TREE": return Token{Type: TokenTree, Value: ident} case "ORPHAN": return Token{Type: TokenOrphan, Value: ident} case "DAYS": return Token{Type: TokenDays, Value: ident} case "WINDOW": return Token{Type: TokenWindow, Value: ident} case "ACTIVITY": return Token{Type: TokenActivity, Value: ident} case "PURGE": return Token{Type: TokenPurge, Value: ident} case "PREVIEW": return Token{Type: TokenPreview, Value: ident} case "PLAN": return Token{Type: TokenPlan, Value: ident} case "DATA": return Token{Type: TokenData, Value: ident} case "LOG": return Token{Type: TokenLog, Value: ident} case "LEVEL": return Token{Type: TokenLevel, Value: ident} case "DEBUG": return Token{Type: TokenDebug, Value: ident} case "INFO": return Token{Type: TokenInfo, Value: ident} case "IN": return Token{Type: TokenIn, Value: ident} case "WARN": return Token{Type: TokenWarn, Value: ident} case "ERROR": return Token{Type: TokenError, Value: ident} case "FATAL": return Token{Type: TokenFatal, Value: ident} case "PANIC": return Token{Type: TokenPanic, Value: ident} case "PARAM": return Token{Type: TokenParam, Value: ident} case "PLAY": return Token{Type: TokenPlay, Value: ident} case "FORMAT": return Token{Type: TokenFormat, Value: ident} case "SAVE": return Token{Type: TokenSave, Value: ident} default: return Token{Type: TokenIdentifier, Value: ident} } } func isLetter(ch byte) bool { return unicode.IsLetter(rune(ch)) } func isDigit(ch byte) bool { return unicode.IsDigit(rune(ch)) }