diff --git a/conf/models/deepseek.json b/conf/models/deepseek.json index 61c6a0f9e6..73a780768c 100644 --- a/conf/models/deepseek.json +++ b/conf/models/deepseek.json @@ -7,17 +7,18 @@ "chat": "chat/completions", "models": "models" }, + "series": "deepseek", "models": [ { - "name": "deepseek-chat", - "max_tokens": 128000, + "name": "deepseek-v4-flash", + "max_tokens": 1048576, "model_types": [ "chat" ] }, { - "name": "deepseek-reasoner", - "max_tokens": 128000, + "name": "deepseek-v4-pro", + "max_tokens": 1048576, "model_types": [ "chat" ] @@ -27,7 +28,15 @@ "thinking": { "default_value": true, "supported_models": [ - "deepseek-chat" + "deepseek-v4-pro", + "deepseek-v4-flash" + ] + }, + "reasoning_effort": { + "default_value": "high", + "supported_modes": [ + "deepseek-v4-pro", + "deepseek-v4-flash" ] } } diff --git a/conf/models/gitee.json b/conf/models/gitee.json new file mode 100644 index 0000000000..bf3927b062 --- /dev/null +++ b/conf/models/gitee.json @@ -0,0 +1,45 @@ +{ + "name": "Gitee", + "url": { + "default": "https://api.moark.com/v1" + }, + "url_suffix": { + "chat": "chat/completions", + "models": "models", + "status": "", + "balance": "tokens/packages/balance", + "embedding": "embedding", + "rerank": "rerank" + }, + "models": [ + { + "name": "qwen3-8b", + "max_tokens": 32768, + "model_types": [ + "chat" + ] + }, + { + "name": "qwen3-0.6b", + "max_tokens": 32768, + "model_types": [ + "chat" + ] + }, + { + "name": "glm-4.7-flash", + "max_tokens": 204800, + "model_types": [ + "chat" + ] + } + ], + "features": { + "thinking": { + "default_value": true, + "supported_models": [ + "deepseek-chat" + ] + } + } +} \ No newline at end of file diff --git a/conf/models/minimax.json b/conf/models/minimax.json index b2bf985600..185753c1f1 100644 --- a/conf/models/minimax.json +++ b/conf/models/minimax.json @@ -9,6 +9,7 @@ "tts": "v1/t2a_v2", "files": "v1/files/list" }, + "series": "minimax", "models": [ { "name": "minimax-m2.7", diff --git a/conf/models/moonshot.json b/conf/models/moonshot.json index e54fdb33d3..91d5e0fa5e 100644 --- a/conf/models/moonshot.json +++ b/conf/models/moonshot.json @@ -8,10 +8,11 @@ "models": "models", "balance": "users/me/balance" }, + "series": "kimi", "models": [ { "name": "kimi-k2.6", - "max_tokens": 256000, + "max_tokens": 262144, "model_types": [ "chat", "vision" @@ -19,7 +20,7 @@ }, { "name": "kimi-k2.5", - "max_tokens": 256000, + "max_tokens": 262144, "model_types": [ "chat", "vision" diff --git a/conf/models/openai.json b/conf/models/openai.json index d21d41650c..db78cdc81e 100644 --- a/conf/models/openai.json +++ b/conf/models/openai.json @@ -6,6 +6,7 @@ "url_suffix": { "chat": "chat/completions" }, + "series": "gpt", "models": [ { "name": "gpt-5.2-pro", @@ -102,30 +103,6 @@ "chat" ] }, - { - "name": "o3", - "max_tokens": 200000, - "model_types": [ - "chat", - "vision" - ] - }, - { - "name": "o4-mini", - "max_tokens": 200000, - "model_types": [ - "chat", - "vision" - ] - }, - { - "name": "o4-mini-high", - "max_tokens": 200000, - "model_types": [ - "chat", - "vision" - ] - }, { "name": "gpt-4o-mini", "max_tokens": 128000, diff --git a/conf/models/siliconflow.json b/conf/models/siliconflow.json index 80acb6c8ba..f1e704c990 100644 --- a/conf/models/siliconflow.json +++ b/conf/models/siliconflow.json @@ -1,26 +1,50 @@ { - "name": "SILICONFLOW", - "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT", + "name": "SiliconFlow", "url": { "default": "https://api.siliconflow.cn/v1" }, "url_suffix": { "chat": "chat/completions", - "async_chat": "async/chat/completions", - "async_result": "async-result", - "embedding": "embedding", + "models": "models", + "embedding": "embeddings", "rerank": "rerank" }, "models": [ + { + "name": "qwen/qwen3-8b", + "max_tokens": 32768, + "model_types": [ + "chat" + ] + }, + { + "name": "qwen/qwen3.5-4b", + "max_tokens": 262144, + "model_types": [ + "chat" + ] + }, + { + "name": "tencent/hunyuan-mt-7b", + "max_tokens": 32768, + "model_types": [ + "chat" + ] + }, { "name": "BAAI/bge-reranker-v2-m3", "max_tokens": 8192, "model_types": [ "rerank" - ], - "features": {} + ] } - ] + ], + "features": { + "thinking": { + "default_value": true, + "supported_models": [ + "deepseek-chat" + ] + } + } } - - diff --git a/conf/models/xai.json b/conf/models/xai.json index 1de51cd6b2..4b36fb378f 100644 --- a/conf/models/xai.json +++ b/conf/models/xai.json @@ -6,6 +6,7 @@ "url_suffix": { "chat": "chat/completions" }, + "series": "grok", "models": [ { "name": "grok-4", diff --git a/conf/models/zhipu-ai.json b/conf/models/zhipu-ai.json index 3ed3b3cf74..0a4285af44 100644 --- a/conf/models/zhipu-ai.json +++ b/conf/models/zhipu-ai.json @@ -11,6 +11,7 @@ "rerank": "rerank", "files": "files" }, + "series": "glm", "models": [ { "name": "glm-5.1", diff --git a/internal/cli/http_client.go b/internal/cli/http_client.go index bb449ce437..cab9858407 100644 --- a/internal/cli/http_client.go +++ b/internal/cli/http_client.go @@ -337,7 +337,7 @@ func (c *HTTPClient) RequestJSON(method, path string, useAPIBase bool, authKind } // RequestStream makes an HTTP request for SSE streaming and returns the response body reader -func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (io.ReadCloser, float64, error) { +func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (io.ReadCloser, error) { url := c.BuildURL(path, useAPIBase) mergedHeaders := c.Headers(authKind, headers) @@ -345,7 +345,7 @@ func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKin if jsonBody != nil { jsonData, err := json.Marshal(jsonBody) if err != nil { - return nil, 0, err + return nil, err } body = bytes.NewReader(jsonData) if mergedHeaders == nil { @@ -361,24 +361,22 @@ func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKin req, err := http.NewRequest(method, url, body) if err != nil { - return nil, 0, err + return nil, err } for k, v := range mergedHeaders { req.Header.Set(k, v) } - startTime := time.Now() resp, err := c.client.Do(req) if err != nil { - return nil, 0, err + return nil, err } - duration := time.Since(startTime).Seconds() if resp.StatusCode != http.StatusOK { resp.Body.Close() - return nil, duration, fmt.Errorf("HTTP %d", resp.StatusCode) + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) } - return resp.Body, duration, nil + return resp.Body, nil } diff --git a/internal/cli/lexer.go b/internal/cli/lexer.go index 8dc12bc3cf..4f5c4c1963 100644 --- a/internal/cli/lexer.go +++ b/internal/cli/lexer.go @@ -303,6 +303,22 @@ func (l *Lexer) lookupIdent(ident string) Token { return Token{Type: TokenChat, Value: ident} case "THINK": return Token{Type: TokenThink, Value: ident} + case "EFFORT": + return Token{Type: TokenEffort, Value: ident} + case "VERBOSITY": + return Token{Type: TokenVerbosity, Value: ident} + case "NONE": + return Token{Type: TokenNone, Value: ident} + case "MINIMAL": + return Token{Type: TokenMinimal, Value: ident} + case "LOW": + return Token{Type: TokenLow, Value: ident} + case "MEDIUM": + return Token{Type: TokenMedium, Value: ident} + case "HIGH": + return Token{Type: TokenHigh, Value: ident} + case "MAX": + return Token{Type: TokenMax, Value: ident} case "STREAM": return Token{Type: TokenStream, Value: ident} case "LS": diff --git a/internal/cli/response.go b/internal/cli/response.go index f611467ee3..90dd0dbba4 100644 --- a/internal/cli/response.go +++ b/internal/cli/response.go @@ -140,6 +140,7 @@ func (r *NonStreamResponse) PrintOut() { fmt.Printf("Thinking: %s\n", r.ReasoningContent) } fmt.Printf("Answer: %s\n", r.Answer) + fmt.Printf("Time: %f\n", r.Duration) } else { fmt.Println("ERROR") fmt.Printf("%d, %s\n", r.Code, r.Message) @@ -166,7 +167,9 @@ func (r *StreamMessageResponse) SetOutputFormat(format OutputFormat) { } func (r *StreamMessageResponse) PrintOut() { - if r.Code != 0 { + if r.Code == 0 { + fmt.Printf("Time: %f\n", r.Duration) + } else { fmt.Println("ERROR") fmt.Printf("%d, %s\n", r.Code, r.Message) } diff --git a/internal/cli/types.go b/internal/cli/types.go index 7969a26bf4..286f310c47 100644 --- a/internal/cli/types.go +++ b/internal/cli/types.go @@ -117,6 +117,14 @@ const ( TokenUse TokenCheck TokenThink + TokenEffort + TokenVerbosity + TokenNone + TokenMinimal + TokenLow + TokenMedium + TokenHigh + TokenMax TokenLS TokenCat TokenInsert diff --git a/internal/cli/user_command.go b/internal/cli/user_command.go index 1066af57cd..ac6d5b3bc8 100644 --- a/internal/cli/user_command.go +++ b/internal/cli/user_command.go @@ -24,6 +24,7 @@ import ( "os" ce "ragflow/internal/cli/contextengine" "strings" + "time" ) // PingServer pings the server to check if it's alive @@ -1460,13 +1461,13 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { // Check if composite_model_name is provided in command if compositeModelName, ok := cmd.Params["composite_model_name"].(string); ok && compositeModelName != "" { - names := strings.Split(compositeModelName, "/") + names := strings.Split(compositeModelName, "@") if len(names) != 3 { - return nil, fmt.Errorf("model name must be in format 'provider/instance/model'") + return nil, fmt.Errorf("model name must be in format 'model@instance@provider'") } - providerName = names[0] + providerName = names[2] instanceName = names[1] - modelName = names[2] + modelName = names[0] } else if c.CurrentModel != nil { // Use current model if set providerName = c.CurrentModel.Provider @@ -1479,18 +1480,27 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { message := cmd.Params["message"].(string) thinking := cmd.Params["thinking"].(bool) stream := cmd.Params["stream"].(bool) + effort := cmd.Params["effort"].(string) + verbosity := cmd.Params["verbosity"].(string) - url := fmt.Sprintf("/providers/%s/instances/%s/models/%s", providerName, instanceName, modelName) + url := fmt.Sprintf("/providers/%s/instances/%s/models", providerName, instanceName) payload := map[string]interface{}{ - "message": message, - "stream": stream, // use stream API - "thinking": thinking, + "model_name": modelName, + "message": message, + "stream": stream, // use stream API + "thinking": thinking, + } + + if thinking { + payload["effort"] = effort + payload["verbosity"] = verbosity } if stream { // Call stream http api - reader, duration, err := c.HTTPClient.RequestStream("POST", url, true, "web", nil, payload) + startTime := time.Now() + reader, err := c.HTTPClient.RequestStream("POST", url, true, "web", nil, payload) if err != nil { return nil, fmt.Errorf("failed to chat model: %w", err) } @@ -1513,6 +1523,7 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { if reasoningPrint { fmt.Print("Thinking: ") reasoningPrint = false + thinking = true } else { fmt.Print(data) } @@ -1543,7 +1554,7 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) { return nil, fmt.Errorf("chat error: received error event from server") } } - + duration := time.Since(startTime).Seconds() if err := scanner.Err(); err != nil { return nil, fmt.Errorf("error reading stream: %w", err) } @@ -1633,15 +1644,15 @@ func (c *RAGFlowClient) UseModel(cmd *Command) (ResponseIf, error) { return nil, fmt.Errorf("model identifier not provided") } - names := strings.Split(compositeModelName, "/") + names := strings.Split(compositeModelName, "@") if len(names) != 3 { - return nil, fmt.Errorf("model identifier must be in format 'provider/instance/model'") + return nil, fmt.Errorf("model identifier must be in format 'model@instance@provider'") } c.CurrentModel = &CurrentModel{ - Provider: names[0], + Provider: names[2], Instance: names[1], - Model: names[2], + Model: names[0], } var result SimpleResponse diff --git a/internal/cli/user_parser.go b/internal/cli/user_parser.go index 951c389326..2db84b55cd 100644 --- a/internal/cli/user_parser.go +++ b/internal/cli/user_parser.go @@ -2241,12 +2241,12 @@ func (p *Parser) parseChatCommand() (*Command, error) { var message string // Check if we have a quoted string that looks like a model identifier (contains two slashes) - // Format: 'provider/instance/model' or just 'message' + // Format: 'model@instance@provider' or just 'message' if p.curToken.Type == TokenQuotedString { firstArg := p.curToken.Value // Check if it looks like a model identifier (contains exactly 2 slashes) - slashCount := strings.Count(firstArg, "/") + slashCount := strings.Count(firstArg, "@") if slashCount == 2 { // This is likely a model identifier, expect another quoted string for message compositeModelName = firstArg @@ -2271,18 +2271,69 @@ func (p *Parser) parseChatCommand() (*Command, error) { return nil, fmt.Errorf("expected model name (quoted string) or message") } + cmd := NewCommand("chat_to_model") + + effort := "default" + verbosity := "low" + if p.curToken.Type == TokenWith { + p.nextToken() // pass WITH + switch p.curToken.Type { + case TokenEffort: + { + p.nextToken() // pass VERBOSITY + switch p.curToken.Type { + case TokenNone: + effort = "none" + case TokenMinimal: + effort = "minimal" + case TokenLow: + effort = "low" + case TokenMedium: + effort = "medium" + case TokenHigh: + effort = "high" + case TokenMax: + effort = "max" + default: + return nil, fmt.Errorf("invalid effort level") + } + p.nextToken() + break + } + case TokenVerbosity: + { + p.nextToken() // pass VERBOSITY + switch p.curToken.Type { + case TokenLow: + verbosity = "low" + case TokenMedium: + verbosity = "median" + case TokenHigh: + verbosity = "high" + default: + return nil, fmt.Errorf("invalid verbosity level") + } + p.nextToken() + break + } + default: + return nil, fmt.Errorf("expected VERBOSITY or EFFORT") + } + } + // Semicolon is optional if p.curToken.Type == TokenSemicolon { p.nextToken() } - cmd := NewCommand("chat_to_model") if compositeModelName != "" { cmd.Params["composite_model_name"] = compositeModelName } cmd.Params["message"] = message cmd.Params["thinking"] = false cmd.Params["stream"] = false + cmd.Params["effort"] = effort + cmd.Params["verbosity"] = verbosity return cmd, nil } @@ -2369,10 +2420,10 @@ func (p *Parser) parseUseCommand() (*Command, error) { } p.nextToken() // consume MODEL - // Parse model identifier in format 'provider/instance/model' + // Parse model identifier in format 'model@instance@provider' compositeModelName, err := p.parseQuotedString() if err != nil { - return nil, fmt.Errorf("expected model identifier in format 'provider/instance/model': %w", err) + return nil, fmt.Errorf("expected model identifier in format 'model@instance@provider': %w", err) } p.nextToken() diff --git a/internal/entity/model.go b/internal/entity/model.go index e8307b7ae3..e1844d9b78 100644 --- a/internal/entity/model.go +++ b/internal/entity/model.go @@ -159,6 +159,7 @@ type Model struct { MaxTokens int `json:"max_tokens"` ModelTypes []string `json:"model_types"` Thinking *ModelThinking `json:"thinking"` + Series *string `json:"series"` ModelTypeMap map[string]bool } @@ -169,6 +170,7 @@ type Provider struct { URLSuffix models.URLSuffix `json:"url_suffix"` Models []*Model `json:"models"` Features Features `json:"features"` + Series string `json:"series"` ModelDriver models.ModelDriver } @@ -255,6 +257,14 @@ func NewProviderManager(dirPath string) (*ProviderManager, error) { } } + if provider.Series == "" { + pos := strings.Index(model.Name, "-") + modelSeries := model.Name[0:pos] + model.Series = &modelSeries + } else { + model.Series = &provider.Name + } + model.ModelTypeMap = make(map[string]bool) for _, modelType := range model.ModelTypes { model.ModelTypeMap[modelType] = true diff --git a/internal/entity/models/common.go b/internal/entity/models/common.go new file mode 100644 index 0000000000..dd8fd62da5 --- /dev/null +++ b/internal/entity/models/common.go @@ -0,0 +1,47 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package models + +import "strings" + +func GetThinkingAndAnswer(modelSeries *string, content *string) (*string, *string) { + switch *modelSeries { + case "qwen3": + return extractThinkContent(content) + } + return nil, content +} + +func extractThinkContent(content *string) (*string, *string) { + startTag := "" + endTag := "" + + startIdx := strings.Index(*content, startTag) + endIdx := strings.Index(*content, endTag) + + if startIdx == -1 || endIdx == -1 || endIdx <= startIdx { + return nil, content + } + + thinking := (*content)[startIdx+len(startTag) : endIdx] + answer := (*content)[endIdx+len(endTag):] + + thinking = strings.TrimLeft(thinking, "\n") + answer = strings.TrimLeft(answer, "\n") + + return &thinking, &answer +} diff --git a/internal/entity/models/deepseek.go b/internal/entity/models/deepseek.go index 5b7a43d905..9ca5f534f8 100644 --- a/internal/entity/models/deepseek.go +++ b/internal/entity/models/deepseek.go @@ -17,11 +17,14 @@ package models import ( + "bufio" "bytes" "encoding/json" "fmt" "io" "net/http" + "ragflow/internal/logger" + "strings" "time" ) @@ -55,7 +58,160 @@ func (z *DeepSeekModel) Name() string { // Chat sends a message and returns response func (z *DeepSeekModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) { - return nil, fmt.Errorf("%s, no such method", z.Name()) + if message == nil { + return nil, fmt.Errorf("message is nil") + } + + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat) + + // Build request body + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + var thinkingFlag string + switch *chatModelConfig.Effort { + case "none": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "low": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "medium": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "high": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "high" + break + case "default": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "high" + break + case "max": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "max" + break + default: + return nil, fmt.Errorf("invalid effort level") + } + reqBody["thinking"] = map[string]interface{}{ + "type": thinkingFlag, + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result map[string]interface{} + if err = json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + choices, ok := result["choices"].([]interface{}) + if !ok || len(choices) == 0 { + return nil, fmt.Errorf("no choices in response") + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid choice format") + } + + messageMap, ok := firstChoice["message"].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid message format") + } + + content, ok := messageMap["content"].(string) + if !ok { + return nil, fmt.Errorf("invalid content format") + } + + var reasonContent string + if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking { + reasonContent, ok = messageMap["reasoning_content"].(string) + if !ok { + return nil, fmt.Errorf("invalid content format") + } + // if first char of reasonContent is \n remove the '\n' + if reasonContent != "" && reasonContent[0] == '\n' { + reasonContent = reasonContent[1:] + } + } + + chatResponse := &ChatResponse{ + Answer: &content, + ReasonContent: &reasonContent, + } + + return chatResponse, nil } // ChatWithMessages sends multiple messages with roles and returns response @@ -65,7 +221,179 @@ func (z *DeepSeekModel) ChatWithMessages(modelName string, apiKey *string, messa // ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel) func (z *DeepSeekModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error { - return nil + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region]) + + // Build request body with streaming enabled + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.DoSample != nil { + reqBody["do_sample"] = *chatModelConfig.DoSample + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + var thinkingFlag string + switch *chatModelConfig.Effort { + case "none": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "low": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "medium": + thinkingFlag = "disabled" + chatModelConfig.Thinking = nil + break + case "high": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "high" + break + case "default": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "high" + break + case "max": + thinkingFlag = "enabled" + reqBody["reasoning_effort"] = "max" + break + default: + return fmt.Errorf("invalid effort level") + } + reqBody["thinking"] = map[string]interface{}{ + "type": thinkingFlag, + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // SSE parsing: read line by line + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + logger.Info(line) + + // SSE data line starts with "data:" + if !strings.HasPrefix(line, "data:") { + continue + } + + // Extract JSON after "data:" + data := strings.TrimSpace(line[5:]) + + // [DONE] marks the end of stream + if data == "[DONE]" { + break + } + + // Parse the JSON event + var event map[string]interface{} + if err = json.Unmarshal([]byte(data), &event); err != nil { + continue + } + + choices, ok := event["choices"].([]interface{}) + if !ok || len(choices) == 0 { + continue + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + continue + } + + delta, ok := firstChoice["delta"].(map[string]interface{}) + if !ok { + continue + } + + content, ok := delta["content"].(string) + if ok && content != "" { + if err := sender(&content, nil); err != nil { + return err + } + } + + reasoningContent, ok := delta["reasoning_content"].(string) + if ok && reasoningContent != "" { + if err := sender(nil, &reasoningContent); err != nil { + return err + } + } + + finishReason, ok := firstChoice["finish_reason"].(string) + if ok && finishReason != "" { + break + } + } + + // Send [DONE] marker for OpenAI compatibility + endOfStream := "[DONE]" + if err = sender(&endOfStream, nil); err != nil { + return err + } + + return scanner.Err() } // EncodeToEmbedding encodes a list of texts into embeddings @@ -73,15 +401,15 @@ func (z *DeepSeekModel) EncodeToEmbedding(modelName *string, texts []string, api return nil, fmt.Errorf("%s, no such method", z.Name()) } -type Model struct { +type DSModel struct { ID string `json:"id"` Object string `json:"object"` OwnedBy string `json:"owned_by"` } -type ModelList struct { - Object string `json:"object"` - Models []Model `json:"data"` +type DSModelList struct { + Object string `json:"object"` + Models []DSModel `json:"data"` } func (z *DeepSeekModel) ListModels(apiConfig *APIConfig) ([]string, error) { @@ -124,7 +452,7 @@ func (z *DeepSeekModel) ListModels(apiConfig *APIConfig) ([]string, error) { } // Parse response - var modelList ModelList + var modelList DSModelList if err = json.Unmarshal(body, &modelList); err != nil { return nil, fmt.Errorf("failed to parse response: %w", err) } diff --git a/internal/entity/models/factory.go b/internal/entity/models/factory.go index facfce3707..d03a020ff1 100644 --- a/internal/entity/models/factory.go +++ b/internal/entity/models/factory.go @@ -41,6 +41,10 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string return NewMoonshotModel(baseURL, urlSuffix), nil case "minimax": return NewMinimaxModel(baseURL, urlSuffix), nil + case "gitee": + return NewGiteeModel(baseURL, urlSuffix), nil + case "siliconflow": + return NewSiliconflowModel(baseURL, urlSuffix), nil default: return NewDummyModel(baseURL, urlSuffix), nil } diff --git a/internal/entity/models/gitee.go b/internal/entity/models/gitee.go new file mode 100644 index 0000000000..f1eb7058dd --- /dev/null +++ b/internal/entity/models/gitee.go @@ -0,0 +1,522 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package models + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "ragflow/internal/logger" + "strings" + "time" +) + +// GiteeModel implements ModelDriver for Gitee +type GiteeModel struct { + BaseURL map[string]string + URLSuffix URLSuffix + httpClient *http.Client // Reusable HTTP client with connection pool +} + +// NewGiteeModel creates a new Gitee model instance +func NewGiteeModel(baseURL map[string]string, urlSuffix URLSuffix) *GiteeModel { + return &GiteeModel{ + BaseURL: baseURL, + URLSuffix: urlSuffix, + httpClient: &http.Client{ + Timeout: 120 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + DisableCompression: false, + }, + }, + } +} + +func (z *GiteeModel) Name() string { + return "gitee" +} + +// Chat sends a message and returns response +func (z *GiteeModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) { + if message == nil { + return nil, fmt.Errorf("message is nil") + } + + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat) + + // I need to get the model series, such as qwen3 is the prefix, the model series will be qwen. glm is the prefix, the model series will be glm. such as the model name: qwen3-0.6b, the model series will be qwen3 + // the model name is glm-4.7, the model series will be glm + modelSeries := strings.Split(*modelName, "-")[0] + if modelSeries == "qwen" || modelSeries == "glm" { + url = fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.AsyncChat) + } + + // Build request body + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + reqBody["thinking"] = map[string]interface{}{ + "type": "enabled", + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result map[string]interface{} + if err = json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + choices, ok := result["choices"].([]interface{}) + if !ok || len(choices) == 0 { + return nil, fmt.Errorf("no choices in response") + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid choice format") + } + + messageMap, ok := firstChoice["message"].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid message format") + } + + content, ok := messageMap["content"].(string) + if !ok { + return nil, fmt.Errorf("invalid content format") + } + + thinking, answer := GetThinkingAndAnswer(chatModelConfig.ModelSeries, &content) + + chatResponse := &ChatResponse{ + Answer: answer, + ReasonContent: thinking, + } + + return chatResponse, nil +} + +// ChatWithMessages sends multiple messages with roles and returns response +func (z *GiteeModel) ChatWithMessages(modelName string, apiKey *string, messages []Message, chatModelConfig *ChatConfig) (string, error) { + return "", fmt.Errorf("%s, ChatWithMessages not implemented", z.Name()) +} + +// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel) +func (z *GiteeModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region]) + + // Build request body with streaming enabled + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.DoSample != nil { + reqBody["do_sample"] = *chatModelConfig.DoSample + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + reqBody["thinking"] = map[string]interface{}{ + "type": "enabled", + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + reserveText := "" + thinkingPhase := false + answerPhase := false + + // SSE parsing: read line by line + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + logger.Info(line) + + // SSE data line starts with "data:" + if !strings.HasPrefix(line, "data:") { + continue + } + + // Extract JSON after "data:" + data := strings.TrimSpace(line[5:]) + + // [DONE] marks the end of stream + if data == "[DONE]" { + break + } + + // Parse the JSON event + var event map[string]interface{} + if err = json.Unmarshal([]byte(data), &event); err != nil { + continue + } + + choices, ok := event["choices"].([]interface{}) + if !ok || len(choices) == 0 { + continue + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + continue + } + + delta, ok := firstChoice["delta"].(map[string]interface{}) + if !ok { + continue + } + + content, ok := delta["content"].(string) + if ok && content != "" { + if content == "" { + thinkingPhase = true + continue + + } else if content == "" { + thinkingPhase = false + answerPhase = true + continue + } + + if thinkingPhase { + if err = sender(nil, &content); err != nil { + return err + } + reserveText = "" + } else if answerPhase { + if err = sender(&content, nil); err != nil { + return err + } + reserveText = "" + } else { + content = strings.Trim(content, "\n") + content = strings.Trim(content, " ") + if content != "" { + reserveText += content + } + } + } + + finishReason, ok := firstChoice["finish_reason"].(string) + if ok && finishReason != "" { + break + } + } + + if reserveText != "" { + if err = sender(&reserveText, nil); err != nil { + return err + } + } + + // Send [DONE] marker for OpenAI compatibility + endOfStream := "[DONE]" + if err = sender(&endOfStream, nil); err != nil { + return err + } + + return scanner.Err() +} + +// EncodeToEmbedding encodes a list of texts into embeddings +func (z *GiteeModel) EncodeToEmbedding(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) { + return nil, fmt.Errorf("%s, no such method", z.Name()) +} + +func (z *GiteeModel) ListModels(apiConfig *APIConfig) ([]string, error) { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Models) + + // Build request body + reqBody := map[string]interface{}{} + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var modelList DSModelList + if err = json.Unmarshal(body, &modelList); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + var models []string + for _, model := range modelList.Models { + modelName := model.ID + if model.OwnedBy != "" { + modelName = model.ID + "@" + model.OwnedBy + } + models = append(models, modelName) + } + + return models, nil +} + +func (z *GiteeModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Balance) + + // Build request body + reqBody := map[string]interface{}{} + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result map[string]interface{} + if err = json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + balance := result["balance"].(float64) + + var response = map[string]interface{}{ + "balance": balance, + "currency": "CNY", + } + + return response, nil +} + +func (z *GiteeModel) CheckConnection(apiConfig *APIConfig) error { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Status) + + // Build request body + reqBody := map[string]interface{}{} + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + return nil +} diff --git a/internal/entity/models/siliconflow.go b/internal/entity/models/siliconflow.go new file mode 100644 index 0000000000..f4a6c0ef78 --- /dev/null +++ b/internal/entity/models/siliconflow.go @@ -0,0 +1,437 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package models + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "ragflow/internal/logger" + "strings" + "time" +) + +// SiliconflowModel implements ModelDriver for Siliconflow +type SiliconflowModel struct { + BaseURL map[string]string + URLSuffix URLSuffix + httpClient *http.Client // Reusable HTTP client with connection pool +} + +// NewSiliconflowModel creates a new Siliconflow model instance +func NewSiliconflowModel(baseURL map[string]string, urlSuffix URLSuffix) *SiliconflowModel { + return &SiliconflowModel{ + BaseURL: baseURL, + URLSuffix: urlSuffix, + httpClient: &http.Client{ + Timeout: 120 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + DisableCompression: false, + }, + }, + } +} + +func (z *SiliconflowModel) Name() string { + return "siliconflow" +} + +// Chat sends a message and returns response +func (z *SiliconflowModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) { + if message == nil { + return nil, fmt.Errorf("message is nil") + } + + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat) + + // I need to get the model series, such as qwen3 is the prefix, the model series will be qwen. glm is the prefix, the model series will be glm. such as the model name: qwen3-0.6b, the model series will be qwen3 + // the model name is glm-4.7, the model series will be glm + modelSeries := strings.Split(*modelName, "-")[0] + if modelSeries == "qwen" || modelSeries == "glm" { + url = fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.AsyncChat) + } + + // Build request body + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + reqBody["thinking"] = map[string]interface{}{ + "type": "enabled", + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result map[string]interface{} + if err = json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + choices, ok := result["choices"].([]interface{}) + if !ok || len(choices) == 0 { + return nil, fmt.Errorf("no choices in response") + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid choice format") + } + + messageMap, ok := firstChoice["message"].(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid message format") + } + + content, ok := messageMap["content"].(string) + if !ok { + return nil, fmt.Errorf("invalid content format") + } + + thinking, answer := GetThinkingAndAnswer(chatModelConfig.ModelSeries, &content) + + chatResponse := &ChatResponse{ + Answer: answer, + ReasonContent: thinking, + } + + return chatResponse, nil +} + +// ChatWithMessages sends multiple messages with roles and returns response +func (z *SiliconflowModel) ChatWithMessages(modelName string, apiKey *string, messages []Message, chatModelConfig *ChatConfig) (string, error) { + return "", fmt.Errorf("%s, ChatWithMessages not implemented", z.Name()) +} + +// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel) +func (z *SiliconflowModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region]) + + // Build request body with streaming enabled + reqBody := map[string]interface{}{ + "model": modelName, + "messages": []map[string]string{ + {"role": "user", "content": *message}, + }, + "stream": false, + "temperature": 1, + } + + if chatModelConfig.Stream != nil { + reqBody["stream"] = *chatModelConfig.Stream + } + + if chatModelConfig.MaxTokens != nil { + reqBody["max_tokens"] = *chatModelConfig.MaxTokens + } + + if chatModelConfig.Temperature != nil { + reqBody["temperature"] = *chatModelConfig.Temperature + } + + if chatModelConfig.DoSample != nil { + reqBody["do_sample"] = *chatModelConfig.DoSample + } + + if chatModelConfig.TopP != nil { + reqBody["top_p"] = *chatModelConfig.TopP + } + + if chatModelConfig.Stop != nil { + reqBody["stop"] = *chatModelConfig.Stop + } + + if chatModelConfig.Thinking != nil { + if *chatModelConfig.Thinking { + reqBody["thinking"] = map[string]interface{}{ + "type": "enabled", + } + } else { + reqBody["thinking"] = map[string]interface{}{ + "type": "disabled", + } + } + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + reserveText := "" + thinkingPhase := false + answerPhase := false + + // SSE parsing: read line by line + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + logger.Info(line) + + // SSE data line starts with "data:" + if !strings.HasPrefix(line, "data:") { + continue + } + + // Extract JSON after "data:" + data := strings.TrimSpace(line[5:]) + + // [DONE] marks the end of stream + if data == "[DONE]" { + break + } + + // Parse the JSON event + var event map[string]interface{} + if err = json.Unmarshal([]byte(data), &event); err != nil { + continue + } + + choices, ok := event["choices"].([]interface{}) + if !ok || len(choices) == 0 { + continue + } + + firstChoice, ok := choices[0].(map[string]interface{}) + if !ok { + continue + } + + delta, ok := firstChoice["delta"].(map[string]interface{}) + if !ok { + continue + } + + content, ok := delta["content"].(string) + if ok && content != "" { + if content == "" { + thinkingPhase = true + continue + + } else if content == "" { + thinkingPhase = false + answerPhase = true + continue + } + + if thinkingPhase { + if err = sender(nil, &content); err != nil { + return err + } + reserveText = "" + } else if answerPhase { + if err = sender(&content, nil); err != nil { + return err + } + reserveText = "" + } else { + content = strings.Trim(content, "\n") + content = strings.Trim(content, " ") + if content != "" { + reserveText += content + } + } + } + + finishReason, ok := firstChoice["finish_reason"].(string) + if ok && finishReason != "" { + break + } + } + + if reserveText != "" { + if err = sender(&reserveText, nil); err != nil { + return err + } + } + + // Send [DONE] marker for OpenAI compatibility + endOfStream := "[DONE]" + if err = sender(&endOfStream, nil); err != nil { + return err + } + + return scanner.Err() +} + +// EncodeToEmbedding encodes a list of texts into embeddings +func (z *SiliconflowModel) EncodeToEmbedding(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) { + return nil, fmt.Errorf("%s, no such method", z.Name()) +} + +func (z *SiliconflowModel) ListModels(apiConfig *APIConfig) ([]string, error) { + var region = "default" + if apiConfig.Region != nil { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Models) + + // Build request body + reqBody := map[string]interface{}{} + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := z.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var modelList DSModelList + if err = json.Unmarshal(body, &modelList); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + var models []string + for _, model := range modelList.Models { + modelName := model.ID + if model.OwnedBy != "" { + modelName = model.ID + "@" + model.OwnedBy + } + models = append(models, modelName) + } + + return models, nil +} + +func (z *SiliconflowModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) { + return nil, fmt.Errorf("%s, no such method", z.Name()) +} + +func (z *SiliconflowModel) CheckConnection(apiConfig *APIConfig) error { + _, err := z.ListModels(apiConfig) + if err != nil { + return err + } + return nil +} diff --git a/internal/entity/models/types.go b/internal/entity/models/types.go index 3a398f01f7..d9461aaf7d 100644 --- a/internal/entity/models/types.go +++ b/internal/entity/models/types.go @@ -41,6 +41,7 @@ type URLSuffix struct { Models string `json:"models"` Balance string `json:"balance"` Files string `json:"files"` + Status string `json:"status"` } type ChatConfig struct { @@ -51,6 +52,9 @@ type ChatConfig struct { TopP *float64 DoSample *bool Stop *[]string + ModelSeries *string + Effort *string + Verbosity *string } type APIConfig struct { diff --git a/internal/handler/providers.go b/internal/handler/providers.go index a3bdddb6c6..8fc7332135 100644 --- a/internal/handler/providers.go +++ b/internal/handler/providers.go @@ -643,9 +643,12 @@ func (h *ProviderHandler) EnableOrDisableModel(c *gin.Context) { } type ChatToModelRequest struct { - Message string `json:"message" binding:"required"` - Stream bool `json:"stream"` - Thinking bool `json:"thinking"` + ModelName string `json:"model_name" binding:"required"` + Message string `json:"message" binding:"required"` + Stream bool `json:"stream"` + Thinking bool `json:"thinking"` + Effort *string `json:"effort"` + Verbosity *string `json:"verbosity"` } func (h *ProviderHandler) ChatToModel(c *gin.Context) { @@ -667,15 +670,6 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) { return } - modelName := c.Param("model_name") - if modelName == "" { - c.JSON(http.StatusBadRequest, gin.H{ - "code": 400, - "message": "Model name is required", - }) - return - } - var req ChatToModelRequest if err := c.ShouldBindJSON(&req); err != nil { println("JSON bind error: %v (type: %T)", err, err) @@ -688,6 +682,28 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) { userID := c.GetString("user_id") + if !req.Thinking { + req.Effort = nil + req.Verbosity = nil + } + + apiConfig := models.APIConfig{ + ApiKey: nil, + Region: nil, + } + + chatConfig := models.ChatConfig{ + Thinking: &req.Thinking, + Stream: &req.Stream, + Stop: &[]string{}, + DoSample: nil, + MaxTokens: nil, + Temperature: nil, + TopP: nil, + Effort: req.Effort, + Verbosity: req.Verbosity, + } + // Check if it's a stream request if req.Stream { // Set SSE headers @@ -720,23 +736,8 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) { return nil } - apiConfig := models.APIConfig{ - ApiKey: nil, - Region: nil, - } - - chatConfig := models.ChatConfig{ - Thinking: &req.Thinking, - Stream: &req.Stream, - Stop: &[]string{}, - DoSample: nil, - MaxTokens: nil, - Temperature: nil, - TopP: nil, - } - // Stream response using sender function (best performance, no channel) - errorCode := h.modelProviderService.ChatToModelStreamWithSender(providerName, instanceName, modelName, userID, req.Message, &apiConfig, &chatConfig, sender) + errorCode := h.modelProviderService.ChatToModelStreamWithSender(providerName, instanceName, req.ModelName, userID, req.Message, &apiConfig, &chatConfig, sender) if errorCode != common.CodeSuccess { c.SSEvent("error", "stream failed") @@ -744,23 +745,8 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) { return } - apiConfig := models.APIConfig{ - ApiKey: nil, - Region: nil, - } - - chatConfig := models.ChatConfig{ - Thinking: &req.Thinking, - Stream: &req.Stream, - Stop: &[]string{}, - DoSample: nil, - MaxTokens: nil, - Temperature: nil, - TopP: nil, - } - // Non-stream response - response, errorCode, err := h.modelProviderService.ChatToModel(providerName, instanceName, modelName, userID, req.Message, &apiConfig, &chatConfig) + response, errorCode, err := h.modelProviderService.ChatToModel(providerName, instanceName, req.ModelName, userID, req.Message, &apiConfig, &chatConfig) if err != nil { c.JSON(http.StatusOK, gin.H{ "code": errorCode, diff --git a/internal/router/router.go b/internal/router/router.go index 18e1ccaaa1..64123ff0a3 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -218,7 +218,7 @@ func (r *Router) Setup(engine *gin.Engine) { provider.DELETE("/:provider_name/instances", r.providerHandler.DropProviderInstance) provider.GET("/:provider_name/instances/:instance_name/models", r.providerHandler.ListInstanceModels) provider.PATCH("/:provider_name/instances/:instance_name/models/:model_name", r.providerHandler.EnableOrDisableModel) - provider.POST("/:provider_name/instances/:instance_name/models/:model_name", r.providerHandler.ChatToModel) + provider.POST("/:provider_name/instances/:instance_name/models", r.providerHandler.ChatToModel) } model := v1.Group("/models") diff --git a/internal/service/model_service.go b/internal/service/model_service.go index 3862bd4e2f..e853789a71 100644 --- a/internal/service/model_service.go +++ b/internal/service/model_service.go @@ -770,11 +770,14 @@ func (m *ModelProviderService) ChatToModel(providerName, instanceName, modelName return nil, common.CodeNotFound, errors.New("provider not found") } - _, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName) + var model *entity.Model = nil + model, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName) if err != nil { return nil, common.CodeNotFound, errors.New(fmt.Sprintf("provider %s model %s not found", providerName, modelName)) } + modelConfig.ModelSeries = model.Series + var extra map[string]string err = json.Unmarshal([]byte(instance.Extra), &extra) if err != nil {