Support stream for multimodal chat (#14537)

### What problem does this PR solve?

Support stream for multimodal chat

### Type of change

- [x] Refactoring
This commit is contained in:
qinling0210
2026-04-30 19:33:57 +08:00
committed by GitHub
parent 5fd4579a2f
commit 12af73f2ca
15 changed files with 369 additions and 227 deletions

View File

@@ -60,7 +60,7 @@ func (z *AliyunModel) Name() string {
return "siliconflow"
}
func (z *AliyunModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *AliyunModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
@@ -195,8 +195,12 @@ func (z *AliyunModel) ChatWithMessages(modelName string, apiConfig *APIConfig, m
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *AliyunModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *AliyunModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -204,13 +208,20 @@ func (z *AliyunModel) ChatStreamlyWithSender(modelName, message *string, apiConf
url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": false,
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}

View File

@@ -60,7 +60,7 @@ func (z *DeepSeekModel) Name() string {
return "deepseek"
}
func (z *DeepSeekModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *DeepSeekModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
@@ -227,8 +227,12 @@ func (z *DeepSeekModel) ChatWithMessages(modelName string, apiConfig *APIConfig,
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *DeepSeekModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *DeepSeekModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -236,77 +240,83 @@ func (z *DeepSeekModel) ChatStreamlyWithSender(modelName, message *string, apiCo
url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": false,
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig != nil {
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
var thinkingFlag string
switch *chatModelConfig.Effort {
case "none":
thinkingFlag = "disabled"
chatModelConfig.Thinking = nil
break
case "low":
thinkingFlag = "disabled"
chatModelConfig.Thinking = nil
break
case "medium":
thinkingFlag = "disabled"
chatModelConfig.Thinking = nil
break
case "high":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "high"
break
case "default":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "high"
break
case "max":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "max"
break
default:
return fmt.Errorf("invalid effort level")
}
reqBody["thinking"] = map[string]interface{}{
"type": thinkingFlag,
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
var thinkingFlag string
switch *chatModelConfig.Effort {
case "none":
thinkingFlag = "disabled"
break
case "low":
thinkingFlag = "disabled"
break
case "medium":
thinkingFlag = "disabled"
break
case "high":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "high"
break
case "default":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "high"
break
case "max":
thinkingFlag = "enabled"
reqBody["reasoning_effort"] = "max"
break
default:
return fmt.Errorf("invalid effort level")
}
reqBody["thinking"] = map[string]interface{}{
"type": thinkingFlag,
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
}
}
}
}

View File

@@ -43,12 +43,12 @@ func (z *DummyModel) Name() string {
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *DummyModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *DummyModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
return nil, fmt.Errorf("not implemented")
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *DummyModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *DummyModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
return fmt.Errorf("not implemented")
}

View File

@@ -61,7 +61,7 @@ func (z *GiteeModel) Name() string {
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *GiteeModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *GiteeModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is nil or empty")
}
@@ -211,8 +211,12 @@ func (z *GiteeModel) ChatWithMessages(modelName string, apiConfig *APIConfig, me
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *GiteeModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *GiteeModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -220,13 +224,20 @@ func (z *GiteeModel) ChatStreamlyWithSender(modelName, message *string, apiConfi
url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": false,
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}

View File

@@ -46,7 +46,7 @@ func (z *GoogleModel) Name() string {
return "google"
}
func (z *GoogleModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *GoogleModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is nil or empty")
}
@@ -119,8 +119,12 @@ func (z *GoogleModel) ChatWithMessages(modelName string, apiConfig *APIConfig, m
return &ChatResponse{Answer: &answer}, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *GoogleModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *GoogleModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
ctx := context.Background()
client, err := genai.NewClient(ctx, &genai.ClientConfig{
APIKey: *apiConfig.ApiKey,
@@ -129,12 +133,53 @@ func (z *GoogleModel) ChatStreamlyWithSender(modelName, message *string, apiConf
if err != nil {
return err
}
contents := []*genai.Content{
genai.NewContentFromText(*message, genai.RoleUser),
// Convert messages to Google SDK format
var contents []*genai.Content
for _, msg := range messages {
var role genai.Role
switch msg.Role {
case "user":
role = genai.RoleUser
case "model", "assistant":
role = genai.RoleModel
default:
role = genai.RoleUser
}
// Handle content based on type
switch c := msg.Content.(type) {
case string:
contents = append(contents, genai.NewContentFromText(c, role))
case []interface{}:
// Multimodal content - group parts within a single content
var parts []*genai.Part
for _, item := range c {
if itemMap, ok := item.(map[string]interface{}); ok {
contentType, _ := itemMap["type"].(string)
switch contentType {
case "text":
if text, ok := itemMap["text"].(string); ok {
parts = append(parts, genai.NewPartFromText(text))
}
case "image_url":
if imgMap, ok := itemMap["image_url"].(map[string]interface{}); ok {
if url, ok := imgMap["url"].(string); ok {
parts = append(parts, genai.NewPartFromURI(url, "image/jpeg"))
}
}
}
}
}
if len(parts) > 0 {
contents = append(contents, genai.NewContentFromParts(parts, role))
}
}
}
for response, err := range client.Models.GenerateContentStream(
ctx,
*modelName,
modelName,
contents,
nil,
) {
@@ -145,7 +190,7 @@ func (z *GoogleModel) ChatStreamlyWithSender(modelName, message *string, apiConf
content := response.Text()
var responseContent string
if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
if chatModelConfig != nil && chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
responseContent = response.Candidates[0].Content.Parts[0].Text
}
@@ -157,7 +202,7 @@ func (z *GoogleModel) ChatStreamlyWithSender(modelName, message *string, apiConf
}
if content != "" {
logger.Info(fmt.Sprintf("Answer: %s", responseContent))
logger.Info(fmt.Sprintf("Answer: %s", content))
if err = sender(&content, nil); err != nil {
return err
}

View File

@@ -179,7 +179,7 @@ func (z *MinimaxModel) Chat(modelName, message *string, apiConfig *APIConfig, mo
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *MinimaxModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *MinimaxModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is nil or empty")
}
@@ -310,8 +310,12 @@ func (z *MinimaxModel) ChatWithMessages(modelName string, apiConfig *APIConfig,
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *MinimaxModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *MinimaxModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
@@ -320,12 +324,19 @@ func (z *MinimaxModel) ChatStreamlyWithSender(modelName, message *string, apiCon
url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]interface{}{
{"role": "user", "content": *message},
},
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}

View File

@@ -60,7 +60,7 @@ func (z *MoonshotModel) Name() string {
return "moonshot"
}
func (k *MoonshotModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (k *MoonshotModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
@@ -199,8 +199,12 @@ func (k *MoonshotModel) ChatWithMessages(modelName string, apiConfig *APIConfig,
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (k *MoonshotModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (k *MoonshotModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -208,13 +212,20 @@ func (k *MoonshotModel) ChatStreamlyWithSender(modelName, message *string, apiCo
url := fmt.Sprintf("%s/chat/completions", k.BaseURL[region])
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": true,
"model": modelName,
"messages": apiMessages,
"stream": true,
}
if chatModelConfig.Stream != nil {

View File

@@ -80,7 +80,7 @@ type SiliconflowRerankResponse struct {
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *SiliconflowModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *SiliconflowModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is nil or empty")
}
@@ -214,8 +214,12 @@ func (z *SiliconflowModel) ChatWithMessages(modelName string, apiConfig *APIConf
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *SiliconflowModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *SiliconflowModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -223,48 +227,57 @@ func (z *SiliconflowModel) ChatStreamlyWithSender(modelName, message *string, ap
url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": false,
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig != nil {
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{
"type": "enabled",
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{
"type": "enabled",
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
}
}
}
}

View File

@@ -18,9 +18,10 @@ type ModelDriver interface {
Name() string
// ChatWithMessages sends multiple messages with role and content
ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error)
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error
ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error)
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
// messages accepts []Message which supports multimodal content (e.g., [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "..."}}])
ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error
// Encode encodes a list of texts into embeddings
Encode(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error)
// Rerank calculates similarity scores between query and texts

View File

@@ -199,7 +199,7 @@ func (z *VllmModel) Chat(modelName, message *string, apiConfig *APIConfig, chatM
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *VllmModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *VllmModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
@@ -332,27 +332,37 @@ func (z *VllmModel) ChatWithMessages(modelName string, apiConfig *APIConfig, mes
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *VllmModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *VllmModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
}
url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
// TODO figure out what's the point of these codes
modelType := strings.Split(*modelName, "-")[0]
modelType := strings.Split(modelName, "-")[0]
if modelType == "qwen" || modelType == "glm" {
url = fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.AsyncChat)
}
// Convert messages to API format (supporting multimodal content)
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": true,
"model": modelName,
"messages": apiMessages,
"stream": true,
}
if modelConfig.Stream != nil {

View File

@@ -211,7 +211,7 @@ func (z *VolcEngine) Chat(modelName, message *string, apiConfig *APIConfig, mode
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *VolcEngine) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *VolcEngine) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
@@ -370,8 +370,12 @@ func (z *VolcEngine) ChatWithMessages(modelName string, apiConfig *APIConfig, me
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *VolcEngine) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *VolcEngine) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
@@ -380,12 +384,19 @@ func (z *VolcEngine) ChatStreamlyWithSender(modelName, message *string, apiConfi
url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]interface{}{
{"role": "user", "content": *message},
},
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}

View File

@@ -61,7 +61,7 @@ func (z *ZhipuAIModel) Name() string {
}
// ChatWithMessages sends multiple messages with roles and returns response
func (z *ZhipuAIModel) ChatWithMessages(modelName string, apiConfig *APIConfig, messages []Message, chatModelConfig *ChatConfig) (*ChatResponse, error) {
func (z *ZhipuAIModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is nil or empty")
}
@@ -201,8 +201,12 @@ func (z *ZhipuAIModel) ChatWithMessages(modelName string, apiConfig *APIConfig,
return chatResponse, nil
}
// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
func (z *ZhipuAIModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
// ChatStreamlyWithSender sends messages and streams response via sender function (best performance, no channel)
func (z *ZhipuAIModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
@@ -210,48 +214,57 @@ func (z *ZhipuAIModel) ChatStreamlyWithSender(modelName, message *string, apiCon
url := fmt.Sprintf("%s/%s", strings.TrimSuffix(z.BaseURL[region], "/"), z.URLSuffix.Chat)
// Convert messages to API format
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming enabled
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{
{"role": "user", "content": *message},
},
"stream": false,
"model": modelName,
"messages": apiMessages,
"stream": true,
"temperature": 1,
}
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig != nil {
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.DoSample != nil {
reqBody["do_sample"] = *chatModelConfig.DoSample
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{
"type": "enabled",
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{
"type": "enabled",
}
} else {
reqBody["thinking"] = map[string]interface{}{
"type": "disabled",
}
}
}
}

View File

@@ -828,24 +828,6 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {
// Check if it's a stream request
if req.Stream {
// Streaming with multimodal messages not yet supported
hasMultimodal := false
for _, msg := range req.Messages {
if content, ok := msg["content"]; ok {
if _, isArray := content.([]interface{}); isArray {
hasMultimodal = true
break
}
}
}
if hasMultimodal {
c.JSON(http.StatusBadRequest, gin.H{
"code": 400,
"message": "Streaming with multimodal messages not yet supported",
})
return
}
// Set SSE headers
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
@@ -876,8 +858,16 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {
return nil
}
// Convert []map[string]interface{} to []models.Message
messages := make([]models.Message, len(req.Messages))
for i, msg := range req.Messages {
role, _ := msg["role"].(string)
content := msg["content"]
messages[i] = models.Message{Role: role, Content: content}
}
// Stream response using sender function (best performance, no channel)
errorCode, err := h.modelProviderService.ChatToModelStreamWithSender(*req.ProviderName, *req.InstanceName, *req.ModelName, userID, req.Messages[0]["content"].(string), &apiConfig, &chatConfig, sender)
errorCode, err := h.modelProviderService.ChatToModelStreamWithSender(*req.ProviderName, *req.InstanceName, *req.ModelName, userID, messages, &apiConfig, &chatConfig, sender)
if errorCode != common.CodeSuccess {
c.SSEvent("error", err.Error())

View File

@@ -562,7 +562,7 @@ func (s *ChatSessionService) asyncChatSolo(dialog *entity.Chat, session *entity.
chatConfig := s.buildChatConfig(dialog, config)
// Perform chat
response, err := chatModel.ModelDriver.ChatWithMessages(*chatModel.ModelName, chatModel.APIConfig, msgs, chatConfig)
response, err := chatModel.ModelDriver.ChatWithMessages(*chatModel.ModelName, msgs, chatModel.APIConfig, chatConfig)
if err != nil {
logger.Error("asyncChatSolo chat failed", err)
return nil, err
@@ -604,26 +604,31 @@ func (s *ChatSessionService) asyncChatSoloStream(dialog *entity.Chat, session *e
return
}
// Convert messages to single string for ChatStreamlyWithSender
var msgBuilder strings.Builder
// Convert messages to []modelModule.Message for ChatStreamlyWithSender
var chatMessages []modelModule.Message
if systemPrompt != "" {
msgBuilder.WriteString("System: " + systemPrompt + "\n")
chatMessages = append(chatMessages, modelModule.Message{
Role: "system",
Content: systemPrompt,
})
}
for _, msg := range processedMessages {
role, _ := msg["role"].(string)
content, _ := msg["content"].(string)
if role != "" && content != "" && role != "system" {
msgBuilder.WriteString(role + ": " + content + "\n")
content := msg["content"]
if role != "" && content != nil && role != "system" {
chatMessages = append(chatMessages, modelModule.Message{
Role: role,
Content: content,
})
}
}
messageStr := msgBuilder.String()
// Get ChatConfig directly from dialog and config
chatConfig := s.buildChatConfig(dialog, config)
// Perform streaming chat using ChatStreamlyWithSender
fullAnswer := ""
err = chatModel.ModelDriver.ChatStreamlyWithSender(chatModel.ModelName, &messageStr, chatModel.APIConfig, chatConfig, func(answer *string, reason *string) error {
err = chatModel.ModelDriver.ChatStreamlyWithSender(*chatModel.ModelName, chatMessages, chatModel.APIConfig, chatConfig, func(answer *string, reason *string) error {
if reason != nil && *reason != "" {
fullAnswer += *reason
ans := s.structureAnswer(session, fullAnswer, messageID, session.ID, reference)

View File

@@ -740,7 +740,7 @@ func (m *ModelProviderService) ChatToModelWithMessages(providerName, instanceNam
apiConfig.ApiKey = &instance.APIKey
var response *modelModule.ChatResponse
response, err = providerInfo.ModelDriver.ChatWithMessages(modelName, apiConfig, messages, modelConfig)
response, err = providerInfo.ModelDriver.ChatWithMessages(modelName, messages, apiConfig, modelConfig)
if err != nil {
return nil, common.CodeServerError, err
}
@@ -776,7 +776,7 @@ func (m *ModelProviderService) ChatToModelWithMessages(providerName, instanceNam
newProviderInfo := providerInfo.ModelDriver.NewInstance(newURL)
var response *modelModule.ChatResponse
response, err = newProviderInfo.ChatWithMessages(modelName, apiConfig, messages, modelConfig)
response, err = newProviderInfo.ChatWithMessages(modelName, messages, apiConfig, modelConfig)
if err != nil {
return nil, common.CodeServerError, err
}
@@ -803,7 +803,7 @@ func (m *ModelProviderService) ChatWithMessagesToModelByApiKey(providerName, mod
}
var response *modelModule.ChatResponse
response, err = providerInfo.ModelDriver.ChatWithMessages(modelName, &modelModule.APIConfig{ApiKey: &apiKey}, messages, nil)
response, err = providerInfo.ModelDriver.ChatWithMessages(modelName, messages, &modelModule.APIConfig{ApiKey: &apiKey}, nil)
if err != nil {
return nil, common.CodeServerError, err
}
@@ -815,7 +815,7 @@ func (m *ModelProviderService) ChatWithMessagesToModelByApiKey(providerName, mod
}
// ChatToModelStreamWithSender streams chat response directly via sender function (best performance, no channel)
func (m *ModelProviderService) ChatToModelStreamWithSender(providerName, instanceName, modelName, userID, message string, apiConfig *modelModule.APIConfig, modelConfig *modelModule.ChatConfig, sender func(*string, *string) error) (common.ErrorCode, error) {
func (m *ModelProviderService) ChatToModelStreamWithSender(providerName, instanceName, modelName, userID string, messages []modelModule.Message, apiConfig *modelModule.APIConfig, modelConfig *modelModule.ChatConfig, sender func(*string, *string) error) (common.ErrorCode, error) {
// Get tenant ID from user
tenants, err := m.userTenantDAO.GetByUserIDAndRole(userID, "owner")
if err != nil {
@@ -861,7 +861,7 @@ func (m *ModelProviderService) ChatToModelStreamWithSender(providerName, instanc
apiConfig.Region = &region
apiConfig.ApiKey = &instance.APIKey
err = providerInfo.ModelDriver.ChatStreamlyWithSender(&modelName, &message, apiConfig, modelConfig, sender)
err = providerInfo.ModelDriver.ChatStreamlyWithSender(modelName, messages, apiConfig, modelConfig, sender)
if err != nil {
return common.CodeServerError, err
}
@@ -893,7 +893,7 @@ func (m *ModelProviderService) ChatToModelStreamWithSender(providerName, instanc
}
newProviderInfo := providerInfo.ModelDriver.NewInstance(newURL)
err = newProviderInfo.ChatStreamlyWithSender(&modelName, &message, apiConfig, modelConfig, sender)
err = newProviderInfo.ChatStreamlyWithSender(modelName, messages, apiConfig, modelConfig, sender)
if err != nil {
return common.CodeServerError, err
}