diff --git a/internal/cli/lexer.go b/internal/cli/lexer.go index 8e682df547..11b4b8c013 100644 --- a/internal/cli/lexer.go +++ b/internal/cli/lexer.go @@ -301,6 +301,14 @@ func (l *Lexer) lookupIdent(ident string) Token { return Token{Type: TokenChats, Value: ident} case "CHAT": return Token{Type: TokenChat, Value: ident} + case "MESSAGE": + return Token{Type: TokenMessage, Value: ident} + case "IMAGE": + return Token{Type: TokenImage, Value: ident} + case "VIDEO": + return Token{Type: TokenVideo, Value: ident} + case "AUDIO": + return Token{Type: TokenAudio, Value: ident} case "THINK": return Token{Type: TokenThink, Value: ident} case "EFFORT": diff --git a/internal/cli/types.go b/internal/cli/types.go index b6032cd11c..25490797d9 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -81,6 +81,10 @@ const ( TokenDefault TokenChats TokenChat + TokenMessage + TokenImage + TokenVideo + TokenAudio TokenStream TokenFiles TokenAs @@ -109,7 +113,6 @@ const ( TokenVector TokenSize TokenName // For ALTER PROVIDER NAME - TokenPool TokenBalance TokenInstance TokenInstances diff --git a/internal/cli/user_command.go b/internal/cli/user_command.go index 2ca0fcca19..5d87b2f643 100644 --- a/internal/cli/user_command.go +++ b/internal/cli/user_command.go @@ -19,8 +19,10 @@ package cli import ( "bufio" "context" + "encoding/base64" "encoding/json" "fmt" + netUrl "net/url" "os" ce "ragflow/internal/cli/filesystem" "strings" @@ -1514,6 +1516,14 @@ func (c *RAGFlowClient) EnableOrDisableModel(cmd *Command, status string) (Respo return &result, nil } +func isValidURL(str string) bool { + u, err := netUrl.Parse(str) + if err != nil { + return false + } + return u.Scheme != "" && u.Host != "" +} + func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { if c.ServerType != "user" { return nil, fmt.Errorf("this command is only allowed in USER mode") @@ -1539,7 +1549,102 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { return nil, fmt.Errorf("model name not provided and no current model set. Use 'use model' command first") } - message := cmd.Params["message"].(string) + formattedMessages := []map[string]interface{}{} + + messages, ok := cmd.Params["messages"].([]string) + if !ok { + return nil, fmt.Errorf("messages not provided") + } + contents := []map[string]interface{}{} + if len(messages) > 0 { + for _, message := range messages { + contents = append(contents, map[string]interface{}{ + "type": "text", + "text": message, + }) + } + + } + + images, ok := cmd.Params["images"].([]string) + if !ok { + return nil, fmt.Errorf("images not provided") + } + if len(images) > 0 { + for _, image := range images { + if isValidURL(image) { + contents = append(contents, map[string]interface{}{ + "type": "image_url", + "image_url": map[string]string{ + "url": image, + }, + }) + } else { + // image is a path, read the file and turn it into base64 + imageContent, err := os.ReadFile(image) + if err != nil { + return nil, fmt.Errorf("failed to read image: %w", err) + } + contents = append(contents, map[string]interface{}{ + "type": "image_file", + "image_file": map[string]interface{}{ + "content": base64.StdEncoding.EncodeToString(imageContent), + }, + }) + } + } + } + + videos, ok := cmd.Params["videos"].([]string) + if !ok { + return nil, fmt.Errorf("images not provided") + } + if len(videos) > 0 { + for _, video := range videos { + if isValidURL(video) { + contents = append(contents, map[string]interface{}{ + "type": "video_url", + "video_url": map[string]interface{}{ + "url": video, + }, + }) + } else { + return nil, fmt.Errorf("invalid video URL: %s", video) + } + } + } + + //audios, ok := cmd.Params["audios"].([]string) + //if !ok { + // return nil, fmt.Errorf("images not provided") + //} + + files, ok := cmd.Params["files"].([]string) + if !ok { + return nil, fmt.Errorf("images not provided") + } + + if len(files) > 0 { + for _, file := range files { + if isValidURL(file) { + contents = append(contents, map[string]interface{}{ + "type": "file_url", + "file_url": map[string]interface{}{ + "url": file, + }, + }) + } else { + return nil, fmt.Errorf("invalid file URL: %s", file) + } + } + } + + formattedText := map[string]interface{}{ + "role": "user", + "content": contents, + } + formattedMessages = append(formattedMessages, formattedText) + thinking := cmd.Params["thinking"].(bool) stream := cmd.Params["stream"].(bool) effort := cmd.Params["effort"].(string) @@ -1547,26 +1652,26 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { url := "/chat/completions" - message = strings.TrimSpace(message) - var content interface{} = message - if strings.HasPrefix(message, "[") && strings.HasSuffix(message, "]") { - var parts []map[string]interface{} - if err := json.Unmarshal([]byte(message), &parts); err == nil { - content = parts - } - } - formattedMessage := []map[string]interface{}{ - { - "role": "user", - "content": content, - }, - } + //message = strings.TrimSpace(message) + //var content interface{} = message + //if strings.HasPrefix(message, "[") && strings.HasSuffix(message, "]") { + // var parts []map[string]interface{} + // if err := json.Unmarshal([]byte(message), &parts); err == nil { + // content = parts + // } + //} + //formattedMessage := []map[string]interface{}{ + // { + // "role": "user", + // "content": content, + // }, + //} payload := map[string]interface{}{ "provider_name": providerName, "instance_name": instanceName, "model_name": modelName, - "messages": formattedMessage, + "messages": formattedMessages, "stream": stream, "thinking": thinking, } diff --git a/internal/cli/user_parser.go b/internal/cli/user_parser.go index 5496cc3aa1..0e2a5fed54 100644 --- a/internal/cli/user_parser.go +++ b/internal/cli/user_parser.go @@ -2412,102 +2412,176 @@ func (p *Parser) parseDisableCommand() (*Command, error) { return cmd, nil } +// CHAT 'model@instance@provider' 'hello world' +// CHAT WITH 'model@instance@provider' MESSAGE 'hello world' 'who are you' IMAGE 'url1' 'file0' VIDEO "url2.mov" "file1" FILE "url" "path file2" AUDIO "file.wav" func (p *Parser) parseChatCommand() (*Command, error) { p.nextToken() // consume CHAT - var compositeModelName string - var message string - - // Check if we have a quoted string that looks like a model identifier (contains two slashes) - // Format: 'model@instance@provider' or just 'message' - if p.curToken.Type == TokenQuotedString { - firstArg := p.curToken.Value - - // Check if it looks like a model identifier (contains exactly 2 slashes) - slashCount := strings.Count(firstArg, "@") - if slashCount == 2 { - // This is likely a model identifier, expect another quoted string for message - compositeModelName = firstArg - p.nextToken() - - // After model name, expect message - if p.curToken.Type != TokenQuotedString { - return nil, fmt.Errorf("expected message after model name") - } - message = p.curToken.Value - p.nextToken() - } else { - // This is just a message, use current model - message = firstArg - p.nextToken() - } - } else if p.curToken.Type == TokenIdentifier { - // Context engine style: chat - message = p.curToken.Value - p.nextToken() - } else { - return nil, fmt.Errorf("expected model name (quoted string) or message") - } - - cmd := NewCommand("chat_to_model") - + var err error + var compositeModelName string = "" + var messages []string + var images []string + var videos []string + var audios []string + var files []string effort := "default" verbosity := "low" - if p.curToken.Type == TokenWith { - p.nextToken() // pass WITH + +optionsLoop: + for { switch p.curToken.Type { + case TokenWith: + p.nextToken() + // 'model@instance@provider' + if compositeModelName != "" { + return nil, fmt.Errorf("model name is already set") + } + compositeModelName, err = p.parseQuotedString() + if err != nil { + return nil, err + } + p.nextToken() + case TokenMessage: + p.nextToken() + if len(messages) != 0 { + return nil, fmt.Errorf("message is already set") + } + messageLoop: + for { + if p.curToken.Type != TokenQuotedString { + break messageLoop + } + var message string + message, err = p.parseQuotedString() + if err != nil { + return nil, err + } + message = strings.TrimSpace(message) + messages = append(messages, message) + p.nextToken() + } + case TokenImage: + p.nextToken() + if len(images) != 0 { + return nil, fmt.Errorf("image is already set") + } + imageLoop: + for { + if p.curToken.Type != TokenQuotedString { + break imageLoop + } + var image string + image, err = p.parseQuotedString() + if err != nil { + return nil, err + } + images = append(images, image) + p.nextToken() + } + case TokenVideo: + p.nextToken() + if len(videos) != 0 { + return nil, fmt.Errorf("video is already set") + } + videoLoop: + for { + if p.curToken.Type != TokenQuotedString { + break videoLoop + } + var video string + video, err = p.parseQuotedString() + if err != nil { + return nil, err + } + videos = append(videos, video) + p.nextToken() + } + case TokenAudio: + p.nextToken() + if len(audios) != 0 { + return nil, fmt.Errorf("video is already set") + } + audioLoop: + for { + if p.curToken.Type != TokenQuotedString { + break audioLoop + } + var audio string + audio, err = p.parseQuotedString() + if err != nil { + return nil, err + } + audios = append(audios, audio) + p.nextToken() + } + case TokenFile: + p.nextToken() + if len(files) != 0 { + return nil, fmt.Errorf("video is already set") + } + fileLoop: + for { + if p.curToken.Type != TokenQuotedString { + break fileLoop + } + var file string + file, err = p.parseQuotedString() + if err != nil { + return nil, err + } + files = append(files, file) + p.nextToken() + } case TokenEffort: - { - p.nextToken() // pass Effort - switch p.curToken.Type { - case TokenNone: - effort = "none" - case TokenMinimal: - effort = "minimal" - case TokenLow: - effort = "low" - case TokenMedium: - effort = "medium" - case TokenHigh: - effort = "high" - case TokenMax: - effort = "max" - default: - return nil, fmt.Errorf("invalid effort level") - } - p.nextToken() - break + p.nextToken() // pass Effort + switch p.curToken.Type { + case TokenNone: + effort = "none" + case TokenMinimal: + effort = "minimal" + case TokenLow: + effort = "low" + case TokenMedium: + effort = "medium" + case TokenHigh: + effort = "high" + case TokenMax: + effort = "max" + default: + return nil, fmt.Errorf("invalid effort level") } + p.nextToken() + break optionsLoop case TokenVerbosity: - { - p.nextToken() // pass VERBOSITY - switch p.curToken.Type { - case TokenLow: - verbosity = "low" - case TokenMedium: - verbosity = "median" - case TokenHigh: - verbosity = "high" - default: - return nil, fmt.Errorf("invalid verbosity level") - } - p.nextToken() - break + p.nextToken() // pass VERBOSITY + switch p.curToken.Type { + case TokenLow: + verbosity = "low" + case TokenMedium: + verbosity = "median" + case TokenHigh: + verbosity = "high" + default: + return nil, fmt.Errorf("invalid verbosity level") } + p.nextToken() + break optionsLoop + case TokenSemicolon: + p.nextToken() + break optionsLoop // done default: - return nil, fmt.Errorf("expected VERBOSITY or EFFORT") + // No more options to process + break optionsLoop } } + cmd := NewCommand("chat_to_model") - // Semicolon is optional - if p.curToken.Type == TokenSemicolon { - p.nextToken() - } - - if compositeModelName != "" { - cmd.Params["composite_model_name"] = compositeModelName - } - cmd.Params["message"] = message + cmd.Params["composite_model_name"] = compositeModelName + cmd.Params["messages"] = messages + cmd.Params["images"] = images + cmd.Params["videos"] = videos + cmd.Params["audios"] = audios + cmd.Params["files"] = files cmd.Params["thinking"] = false cmd.Params["stream"] = false cmd.Params["effort"] = effort