mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Go: CLI chat with text, image, video (#14573)
### What problem does this PR solve? ``` RAGFlow(user)> chat with 'glm-4.6v-flash@test@zhipu-ai' message 'What are the pics talk about?' image 'https://cdn.bigmodel.cn/static/logo/register.png' 'https://cdn.bigmodel.cn/static/logo/api-key.png' Answer: The first picture shows a login/register modal with options for phone number login, account login, and WeChat QR code login, along with a prompt for new users to get a 20 million tokens experience package. The second picture displays the API keys management page of a platform, including a warning about API key security and a table listing existing API keys with details like creation time and usage history. Time: 31.600545 RAGFlow(user)> chat with 'glm-4.6v-flash@test@zhipu-ai' message 'What are the video talk about?' video 'https://cdn.bigmodel.cn/agent-demos/lark/113123.mov' Answer: Based on the sequence of frames provided, the video is a demonstration of a web search and navigation process. 1. The video starts with a blank Google search page. 2. The user types "智谱" (which is the Chinese name for the company Zhipu AI) into the search box. 3. The search is initiated and the page shows "About 0 results". 4. The search results load, showing information about Zhipu AI, including its website. 5. The user clicks on the main website link (www.zhipuai.cn). 6. The video ends by showing the homepage of Zhipu AI's website, titled "Z.ai GLM Large Model Open Platform". In summary, the video is about searching for the company "智谱" (Zhipu AI) on Google and then navigating to its official website. Time: 76.582520 ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@@ -301,6 +301,14 @@ func (l *Lexer) lookupIdent(ident string) Token {
|
||||
return Token{Type: TokenChats, Value: ident}
|
||||
case "CHAT":
|
||||
return Token{Type: TokenChat, Value: ident}
|
||||
case "MESSAGE":
|
||||
return Token{Type: TokenMessage, Value: ident}
|
||||
case "IMAGE":
|
||||
return Token{Type: TokenImage, Value: ident}
|
||||
case "VIDEO":
|
||||
return Token{Type: TokenVideo, Value: ident}
|
||||
case "AUDIO":
|
||||
return Token{Type: TokenAudio, Value: ident}
|
||||
case "THINK":
|
||||
return Token{Type: TokenThink, Value: ident}
|
||||
case "EFFORT":
|
||||
|
||||
@@ -81,6 +81,10 @@ const (
|
||||
TokenDefault
|
||||
TokenChats
|
||||
TokenChat
|
||||
TokenMessage
|
||||
TokenImage
|
||||
TokenVideo
|
||||
TokenAudio
|
||||
TokenStream
|
||||
TokenFiles
|
||||
TokenAs
|
||||
@@ -109,7 +113,6 @@ const (
|
||||
TokenVector
|
||||
TokenSize
|
||||
TokenName // For ALTER PROVIDER <name> NAME <new_name>
|
||||
TokenPool
|
||||
TokenBalance
|
||||
TokenInstance
|
||||
TokenInstances
|
||||
|
||||
@@ -19,8 +19,10 @@ package cli
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
netUrl "net/url"
|
||||
"os"
|
||||
ce "ragflow/internal/cli/filesystem"
|
||||
"strings"
|
||||
@@ -1514,6 +1516,14 @@ func (c *RAGFlowClient) EnableOrDisableModel(cmd *Command, status string) (Respo
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func isValidURL(str string) bool {
|
||||
u, err := netUrl.Parse(str)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return u.Scheme != "" && u.Host != ""
|
||||
}
|
||||
|
||||
func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
|
||||
if c.ServerType != "user" {
|
||||
return nil, fmt.Errorf("this command is only allowed in USER mode")
|
||||
@@ -1539,7 +1549,102 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
|
||||
return nil, fmt.Errorf("model name not provided and no current model set. Use 'use model' command first")
|
||||
}
|
||||
|
||||
message := cmd.Params["message"].(string)
|
||||
formattedMessages := []map[string]interface{}{}
|
||||
|
||||
messages, ok := cmd.Params["messages"].([]string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("messages not provided")
|
||||
}
|
||||
contents := []map[string]interface{}{}
|
||||
if len(messages) > 0 {
|
||||
for _, message := range messages {
|
||||
contents = append(contents, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": message,
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
images, ok := cmd.Params["images"].([]string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("images not provided")
|
||||
}
|
||||
if len(images) > 0 {
|
||||
for _, image := range images {
|
||||
if isValidURL(image) {
|
||||
contents = append(contents, map[string]interface{}{
|
||||
"type": "image_url",
|
||||
"image_url": map[string]string{
|
||||
"url": image,
|
||||
},
|
||||
})
|
||||
} else {
|
||||
// image is a path, read the file and turn it into base64
|
||||
imageContent, err := os.ReadFile(image)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read image: %w", err)
|
||||
}
|
||||
contents = append(contents, map[string]interface{}{
|
||||
"type": "image_file",
|
||||
"image_file": map[string]interface{}{
|
||||
"content": base64.StdEncoding.EncodeToString(imageContent),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
videos, ok := cmd.Params["videos"].([]string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("images not provided")
|
||||
}
|
||||
if len(videos) > 0 {
|
||||
for _, video := range videos {
|
||||
if isValidURL(video) {
|
||||
contents = append(contents, map[string]interface{}{
|
||||
"type": "video_url",
|
||||
"video_url": map[string]interface{}{
|
||||
"url": video,
|
||||
},
|
||||
})
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid video URL: %s", video)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//audios, ok := cmd.Params["audios"].([]string)
|
||||
//if !ok {
|
||||
// return nil, fmt.Errorf("images not provided")
|
||||
//}
|
||||
|
||||
files, ok := cmd.Params["files"].([]string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("images not provided")
|
||||
}
|
||||
|
||||
if len(files) > 0 {
|
||||
for _, file := range files {
|
||||
if isValidURL(file) {
|
||||
contents = append(contents, map[string]interface{}{
|
||||
"type": "file_url",
|
||||
"file_url": map[string]interface{}{
|
||||
"url": file,
|
||||
},
|
||||
})
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid file URL: %s", file)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
formattedText := map[string]interface{}{
|
||||
"role": "user",
|
||||
"content": contents,
|
||||
}
|
||||
formattedMessages = append(formattedMessages, formattedText)
|
||||
|
||||
thinking := cmd.Params["thinking"].(bool)
|
||||
stream := cmd.Params["stream"].(bool)
|
||||
effort := cmd.Params["effort"].(string)
|
||||
@@ -1547,26 +1652,26 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
|
||||
|
||||
url := "/chat/completions"
|
||||
|
||||
message = strings.TrimSpace(message)
|
||||
var content interface{} = message
|
||||
if strings.HasPrefix(message, "[") && strings.HasSuffix(message, "]") {
|
||||
var parts []map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(message), &parts); err == nil {
|
||||
content = parts
|
||||
}
|
||||
}
|
||||
formattedMessage := []map[string]interface{}{
|
||||
{
|
||||
"role": "user",
|
||||
"content": content,
|
||||
},
|
||||
}
|
||||
//message = strings.TrimSpace(message)
|
||||
//var content interface{} = message
|
||||
//if strings.HasPrefix(message, "[") && strings.HasSuffix(message, "]") {
|
||||
// var parts []map[string]interface{}
|
||||
// if err := json.Unmarshal([]byte(message), &parts); err == nil {
|
||||
// content = parts
|
||||
// }
|
||||
//}
|
||||
//formattedMessage := []map[string]interface{}{
|
||||
// {
|
||||
// "role": "user",
|
||||
// "content": content,
|
||||
// },
|
||||
//}
|
||||
|
||||
payload := map[string]interface{}{
|
||||
"provider_name": providerName,
|
||||
"instance_name": instanceName,
|
||||
"model_name": modelName,
|
||||
"messages": formattedMessage,
|
||||
"messages": formattedMessages,
|
||||
"stream": stream,
|
||||
"thinking": thinking,
|
||||
}
|
||||
|
||||
@@ -2412,102 +2412,176 @@ func (p *Parser) parseDisableCommand() (*Command, error) {
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
// CHAT 'model@instance@provider' 'hello world'
|
||||
// CHAT WITH 'model@instance@provider' MESSAGE 'hello world' 'who are you' IMAGE 'url1' 'file0' VIDEO "url2.mov" "file1" FILE "url" "path file2" AUDIO "file.wav"
|
||||
func (p *Parser) parseChatCommand() (*Command, error) {
|
||||
p.nextToken() // consume CHAT
|
||||
|
||||
var compositeModelName string
|
||||
var message string
|
||||
|
||||
// Check if we have a quoted string that looks like a model identifier (contains two slashes)
|
||||
// Format: 'model@instance@provider' or just 'message'
|
||||
if p.curToken.Type == TokenQuotedString {
|
||||
firstArg := p.curToken.Value
|
||||
|
||||
// Check if it looks like a model identifier (contains exactly 2 slashes)
|
||||
slashCount := strings.Count(firstArg, "@")
|
||||
if slashCount == 2 {
|
||||
// This is likely a model identifier, expect another quoted string for message
|
||||
compositeModelName = firstArg
|
||||
p.nextToken()
|
||||
|
||||
// After model name, expect message
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
return nil, fmt.Errorf("expected message after model name")
|
||||
}
|
||||
message = p.curToken.Value
|
||||
p.nextToken()
|
||||
} else {
|
||||
// This is just a message, use current model
|
||||
message = firstArg
|
||||
p.nextToken()
|
||||
}
|
||||
} else if p.curToken.Type == TokenIdentifier {
|
||||
// Context engine style: chat <message>
|
||||
message = p.curToken.Value
|
||||
p.nextToken()
|
||||
} else {
|
||||
return nil, fmt.Errorf("expected model name (quoted string) or message")
|
||||
}
|
||||
|
||||
cmd := NewCommand("chat_to_model")
|
||||
|
||||
var err error
|
||||
var compositeModelName string = ""
|
||||
var messages []string
|
||||
var images []string
|
||||
var videos []string
|
||||
var audios []string
|
||||
var files []string
|
||||
effort := "default"
|
||||
verbosity := "low"
|
||||
if p.curToken.Type == TokenWith {
|
||||
p.nextToken() // pass WITH
|
||||
|
||||
optionsLoop:
|
||||
for {
|
||||
switch p.curToken.Type {
|
||||
case TokenWith:
|
||||
p.nextToken()
|
||||
// 'model@instance@provider'
|
||||
if compositeModelName != "" {
|
||||
return nil, fmt.Errorf("model name is already set")
|
||||
}
|
||||
compositeModelName, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.nextToken()
|
||||
case TokenMessage:
|
||||
p.nextToken()
|
||||
if len(messages) != 0 {
|
||||
return nil, fmt.Errorf("message is already set")
|
||||
}
|
||||
messageLoop:
|
||||
for {
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
break messageLoop
|
||||
}
|
||||
var message string
|
||||
message, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
message = strings.TrimSpace(message)
|
||||
messages = append(messages, message)
|
||||
p.nextToken()
|
||||
}
|
||||
case TokenImage:
|
||||
p.nextToken()
|
||||
if len(images) != 0 {
|
||||
return nil, fmt.Errorf("image is already set")
|
||||
}
|
||||
imageLoop:
|
||||
for {
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
break imageLoop
|
||||
}
|
||||
var image string
|
||||
image, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
images = append(images, image)
|
||||
p.nextToken()
|
||||
}
|
||||
case TokenVideo:
|
||||
p.nextToken()
|
||||
if len(videos) != 0 {
|
||||
return nil, fmt.Errorf("video is already set")
|
||||
}
|
||||
videoLoop:
|
||||
for {
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
break videoLoop
|
||||
}
|
||||
var video string
|
||||
video, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
videos = append(videos, video)
|
||||
p.nextToken()
|
||||
}
|
||||
case TokenAudio:
|
||||
p.nextToken()
|
||||
if len(audios) != 0 {
|
||||
return nil, fmt.Errorf("video is already set")
|
||||
}
|
||||
audioLoop:
|
||||
for {
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
break audioLoop
|
||||
}
|
||||
var audio string
|
||||
audio, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
audios = append(audios, audio)
|
||||
p.nextToken()
|
||||
}
|
||||
case TokenFile:
|
||||
p.nextToken()
|
||||
if len(files) != 0 {
|
||||
return nil, fmt.Errorf("video is already set")
|
||||
}
|
||||
fileLoop:
|
||||
for {
|
||||
if p.curToken.Type != TokenQuotedString {
|
||||
break fileLoop
|
||||
}
|
||||
var file string
|
||||
file, err = p.parseQuotedString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
files = append(files, file)
|
||||
p.nextToken()
|
||||
}
|
||||
case TokenEffort:
|
||||
{
|
||||
p.nextToken() // pass Effort
|
||||
switch p.curToken.Type {
|
||||
case TokenNone:
|
||||
effort = "none"
|
||||
case TokenMinimal:
|
||||
effort = "minimal"
|
||||
case TokenLow:
|
||||
effort = "low"
|
||||
case TokenMedium:
|
||||
effort = "medium"
|
||||
case TokenHigh:
|
||||
effort = "high"
|
||||
case TokenMax:
|
||||
effort = "max"
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid effort level")
|
||||
}
|
||||
p.nextToken()
|
||||
break
|
||||
p.nextToken() // pass Effort
|
||||
switch p.curToken.Type {
|
||||
case TokenNone:
|
||||
effort = "none"
|
||||
case TokenMinimal:
|
||||
effort = "minimal"
|
||||
case TokenLow:
|
||||
effort = "low"
|
||||
case TokenMedium:
|
||||
effort = "medium"
|
||||
case TokenHigh:
|
||||
effort = "high"
|
||||
case TokenMax:
|
||||
effort = "max"
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid effort level")
|
||||
}
|
||||
p.nextToken()
|
||||
break optionsLoop
|
||||
case TokenVerbosity:
|
||||
{
|
||||
p.nextToken() // pass VERBOSITY
|
||||
switch p.curToken.Type {
|
||||
case TokenLow:
|
||||
verbosity = "low"
|
||||
case TokenMedium:
|
||||
verbosity = "median"
|
||||
case TokenHigh:
|
||||
verbosity = "high"
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid verbosity level")
|
||||
}
|
||||
p.nextToken()
|
||||
break
|
||||
p.nextToken() // pass VERBOSITY
|
||||
switch p.curToken.Type {
|
||||
case TokenLow:
|
||||
verbosity = "low"
|
||||
case TokenMedium:
|
||||
verbosity = "median"
|
||||
case TokenHigh:
|
||||
verbosity = "high"
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid verbosity level")
|
||||
}
|
||||
p.nextToken()
|
||||
break optionsLoop
|
||||
case TokenSemicolon:
|
||||
p.nextToken()
|
||||
break optionsLoop // done
|
||||
default:
|
||||
return nil, fmt.Errorf("expected VERBOSITY or EFFORT")
|
||||
// No more options to process
|
||||
break optionsLoop
|
||||
}
|
||||
}
|
||||
cmd := NewCommand("chat_to_model")
|
||||
|
||||
// Semicolon is optional
|
||||
if p.curToken.Type == TokenSemicolon {
|
||||
p.nextToken()
|
||||
}
|
||||
|
||||
if compositeModelName != "" {
|
||||
cmd.Params["composite_model_name"] = compositeModelName
|
||||
}
|
||||
cmd.Params["message"] = message
|
||||
cmd.Params["composite_model_name"] = compositeModelName
|
||||
cmd.Params["messages"] = messages
|
||||
cmd.Params["images"] = images
|
||||
cmd.Params["videos"] = videos
|
||||
cmd.Params["audios"] = audios
|
||||
cmd.Params["files"] = files
|
||||
cmd.Params["thinking"] = false
|
||||
cmd.Params["stream"] = false
|
||||
cmd.Params["effort"] = effort
|
||||
|
||||
Reference in New Issue
Block a user