Go: add gitee and siliconflow as model provider (#14336)

### What problem does this PR solve? As title ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-07-03 17:21:59 +08:00 · 2026-04-24 20:59:30 +08:00
parent e5cfe7fb8f
commit 1c244df90d
24 changed files with 1609 additions and 122 deletions
--- a/conf/models/deepseek.json
+++ b/conf/models/deepseek.json
@@ -7,17 +7,18 @@
    "chat": "chat/completions",
    "models": "models"
  },
+  "series": "deepseek",
  "models": [
    {
-      "name": "deepseek-chat",
-      "max_tokens": 128000,
+      "name": "deepseek-v4-flash",
+      "max_tokens": 1048576,
      "model_types": [
        "chat"
      ]
    },
    {
-      "name": "deepseek-reasoner",
-      "max_tokens": 128000,
+      "name": "deepseek-v4-pro",
+      "max_tokens": 1048576,
      "model_types": [
        "chat"
      ]
@@ -27,7 +28,15 @@
    "thinking": {
      "default_value": true,
      "supported_models": [
-        "deepseek-chat"
+        "deepseek-v4-pro",
+        "deepseek-v4-flash"
+      ]
+    },
+    "reasoning_effort": {
+      "default_value": "high",
+      "supported_modes": [
+        "deepseek-v4-pro",
+        "deepseek-v4-flash"
      ]
    }
  }
--- a/conf/models/gitee.json
+++ b/conf/models/gitee.json
@@ -0,0 +1,45 @@
+{
+  "name": "Gitee",
+  "url": {
+    "default": "https://api.moark.com/v1"
+  },
+  "url_suffix": {
+    "chat": "chat/completions",
+    "models": "models",
+    "status": "",
+    "balance": "tokens/packages/balance",
+    "embedding": "embedding",
+    "rerank": "rerank"
+  },
+  "models": [
+    {
+      "name": "qwen3-8b",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "qwen3-0.6b",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "glm-4.7-flash",
+      "max_tokens": 204800,
+      "model_types": [
+        "chat"
+      ]
+    }
+  ],
+  "features": {
+    "thinking": {
+      "default_value": true,
+      "supported_models": [
+        "deepseek-chat"
+      ]
+    }
+  }
+}
--- a/conf/models/minimax.json
+++ b/conf/models/minimax.json
@@ -9,6 +9,7 @@
    "tts": "v1/t2a_v2",
    "files": "v1/files/list"
  },
+  "series": "minimax",
  "models": [
    {
      "name": "minimax-m2.7",
--- a/conf/models/moonshot.json
+++ b/conf/models/moonshot.json
@@ -8,10 +8,11 @@
    "models": "models",
    "balance": "users/me/balance"
  },
+  "series": "kimi",
  "models": [
    {
      "name": "kimi-k2.6",
-      "max_tokens": 256000,
+      "max_tokens": 262144,
      "model_types": [
        "chat",
        "vision"
@@ -19,7 +20,7 @@
    },
    {
      "name": "kimi-k2.5",
-      "max_tokens": 256000,
+      "max_tokens": 262144,
      "model_types": [
        "chat",
        "vision"
--- a/conf/models/openai.json
+++ b/conf/models/openai.json
@@ -6,6 +6,7 @@
  "url_suffix": {
    "chat": "chat/completions"
  },
+  "series": "gpt",
  "models": [
    {
      "name": "gpt-5.2-pro",
@@ -102,30 +103,6 @@
        "chat"
      ]
    },
-    {
-      "name": "o3",
-      "max_tokens": 200000,
-      "model_types": [
-        "chat",
-        "vision"
-      ]
-    },
-    {
-      "name": "o4-mini",
-      "max_tokens": 200000,
-      "model_types": [
-        "chat",
-        "vision"
-      ]
-    },
-    {
-      "name": "o4-mini-high",
-      "max_tokens": 200000,
-      "model_types": [
-        "chat",
-        "vision"
-      ]
-    },
    {
      "name": "gpt-4o-mini",
      "max_tokens": 128000,
--- a/conf/models/siliconflow.json
+++ b/conf/models/siliconflow.json
@@ -1,26 +1,50 @@
 {
-  "name": "SILICONFLOW",
-  "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
+  "name": "SiliconFlow",
  "url": {
    "default": "https://api.siliconflow.cn/v1"
  },
  "url_suffix": {
    "chat": "chat/completions",
-    "async_chat": "async/chat/completions",
-    "async_result": "async-result",
-    "embedding": "embedding",
+    "models": "models",
+    "embedding": "embeddings",
    "rerank": "rerank"
  },
  "models": [
+    {
+      "name": "qwen/qwen3-8b",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "qwen/qwen3.5-4b",
+      "max_tokens": 262144,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "tencent/hunyuan-mt-7b",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
    {
      "name": "BAAI/bge-reranker-v2-m3",
      "max_tokens": 8192,
      "model_types": [
        "rerank"
-      ],
-      "features": {}
+      ]
    }
-  ]
+  ],
+  "features": {
+    "thinking": {
+      "default_value": true,
+      "supported_models": [
+        "deepseek-chat"
+      ]
+    }
+  }
 }
-
-
--- a/conf/models/xai.json
+++ b/conf/models/xai.json
@@ -6,6 +6,7 @@
  "url_suffix": {
    "chat": "chat/completions"
  },
+  "series": "grok",
  "models": [
    {
      "name": "grok-4",
--- a/conf/models/zhipu-ai.json
+++ b/conf/models/zhipu-ai.json
@@ -11,6 +11,7 @@
    "rerank": "rerank",
    "files": "files"
  },
+  "series": "glm",
  "models": [
    {
      "name": "glm-5.1",
--- a/internal/cli/http_client.go
+++ b/internal/cli/http_client.go
@@ -337,7 +337,7 @@ func (c *HTTPClient) RequestJSON(method, path string, useAPIBase bool, authKind
 }

 // RequestStream makes an HTTP request for SSE streaming and returns the response body reader
-func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (io.ReadCloser, float64, error) {
+func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (io.ReadCloser, error) {
 	url := c.BuildURL(path, useAPIBase)
 	mergedHeaders := c.Headers(authKind, headers)

@@ -345,7 +345,7 @@ func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKin
 	if jsonBody != nil {
 		jsonData, err := json.Marshal(jsonBody)
 		if err != nil {
-			return nil, 0, err
+			return nil, err
 		}
 		body = bytes.NewReader(jsonData)
 		if mergedHeaders == nil {
@@ -361,24 +361,22 @@ func (c *HTTPClient) RequestStream(method, path string, useAPIBase bool, authKin

 	req, err := http.NewRequest(method, url, body)
 	if err != nil {
-		return nil, 0, err
+		return nil, err
 	}

 	for k, v := range mergedHeaders {
 		req.Header.Set(k, v)
 	}

-	startTime := time.Now()
 	resp, err := c.client.Do(req)
 	if err != nil {
-		return nil, 0, err
+		return nil, err
 	}
-	duration := time.Since(startTime).Seconds()

 	if resp.StatusCode != http.StatusOK {
 		resp.Body.Close()
-		return nil, duration, fmt.Errorf("HTTP %d", resp.StatusCode)
+		return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
 	}

-	return resp.Body, duration, nil
+	return resp.Body, nil
 }
--- a/internal/cli/lexer.go
+++ b/internal/cli/lexer.go
@@ -303,6 +303,22 @@ func (l *Lexer) lookupIdent(ident string) Token {
 		return Token{Type: TokenChat, Value: ident}
 	case "THINK":
 		return Token{Type: TokenThink, Value: ident}
+	case "EFFORT":
+		return Token{Type: TokenEffort, Value: ident}
+	case "VERBOSITY":
+		return Token{Type: TokenVerbosity, Value: ident}
+	case "NONE":
+		return Token{Type: TokenNone, Value: ident}
+	case "MINIMAL":
+		return Token{Type: TokenMinimal, Value: ident}
+	case "LOW":
+		return Token{Type: TokenLow, Value: ident}
+	case "MEDIUM":
+		return Token{Type: TokenMedium, Value: ident}
+	case "HIGH":
+		return Token{Type: TokenHigh, Value: ident}
+	case "MAX":
+		return Token{Type: TokenMax, Value: ident}
 	case "STREAM":
 		return Token{Type: TokenStream, Value: ident}
 	case "LS":
--- a/internal/cli/response.go
+++ b/internal/cli/response.go
@@ -140,6 +140,7 @@ func (r *NonStreamResponse) PrintOut() {
 			fmt.Printf("Thinking: %s\n", r.ReasoningContent)
 		}
 		fmt.Printf("Answer: %s\n", r.Answer)
+		fmt.Printf("Time: %f\n", r.Duration)
 	} else {
 		fmt.Println("ERROR")
 		fmt.Printf("%d, %s\n", r.Code, r.Message)
@@ -166,7 +167,9 @@ func (r *StreamMessageResponse) SetOutputFormat(format OutputFormat) {
 }

 func (r *StreamMessageResponse) PrintOut() {
-	if r.Code != 0 {
+	if r.Code == 0 {
+		fmt.Printf("Time: %f\n", r.Duration)
+	} else {
 		fmt.Println("ERROR")
 		fmt.Printf("%d, %s\n", r.Code, r.Message)
 	}
--- a/internal/cli/types.go
+++ b/internal/cli/types.go
@@ -117,6 +117,14 @@ const (
 	TokenUse
 	TokenCheck
 	TokenThink
+	TokenEffort
+	TokenVerbosity
+	TokenNone
+	TokenMinimal
+	TokenLow
+	TokenMedium
+	TokenHigh
+	TokenMax
 	TokenLS
 	TokenCat
 	TokenInsert
--- a/internal/cli/user_command.go
+++ b/internal/cli/user_command.go
@@ -24,6 +24,7 @@ import (
 	"os"
 	ce "ragflow/internal/cli/contextengine"
 	"strings"
+	"time"
 )

 // PingServer pings the server to check if it's alive
@@ -1460,13 +1461,13 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {

 	// Check if composite_model_name is provided in command
 	if compositeModelName, ok := cmd.Params["composite_model_name"].(string); ok && compositeModelName != "" {
-		names := strings.Split(compositeModelName, "/")
+		names := strings.Split(compositeModelName, "@")
 		if len(names) != 3 {
-			return nil, fmt.Errorf("model name must be in format 'provider/instance/model'")
+			return nil, fmt.Errorf("model name must be in format 'model@instance@provider'")
 		}
-		providerName = names[0]
+		providerName = names[2]
 		instanceName = names[1]
-		modelName = names[2]
+		modelName = names[0]
 	} else if c.CurrentModel != nil {
 		// Use current model if set
 		providerName = c.CurrentModel.Provider
@@ -1479,18 +1480,27 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
 	message := cmd.Params["message"].(string)
 	thinking := cmd.Params["thinking"].(bool)
 	stream := cmd.Params["stream"].(bool)
+	effort := cmd.Params["effort"].(string)
+	verbosity := cmd.Params["verbosity"].(string)

-	url := fmt.Sprintf("/providers/%s/instances/%s/models/%s", providerName, instanceName, modelName)
+	url := fmt.Sprintf("/providers/%s/instances/%s/models", providerName, instanceName)

 	payload := map[string]interface{}{
-		"message":  message,
-		"stream":   stream, // use stream API
-		"thinking": thinking,
+		"model_name": modelName,
+		"message":    message,
+		"stream":     stream, // use stream API
+		"thinking":   thinking,
+	}
+
+	if thinking {
+		payload["effort"] = effort
+		payload["verbosity"] = verbosity
 	}

 	if stream {
 		// Call stream http api
-		reader, duration, err := c.HTTPClient.RequestStream("POST", url, true, "web", nil, payload)
+		startTime := time.Now()
+		reader, err := c.HTTPClient.RequestStream("POST", url, true, "web", nil, payload)
 		if err != nil {
 			return nil, fmt.Errorf("failed to chat model: %w", err)
 		}
@@ -1513,6 +1523,7 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
 					if reasoningPrint {
 						fmt.Print("Thinking: ")
 						reasoningPrint = false
+						thinking = true
 					} else {
 						fmt.Print(data)
 					}
@@ -1543,7 +1554,7 @@ func (c *RAGFlowClient) ChatToModel(cmd *Command) (ResponseIf, error) {
 				return nil, fmt.Errorf("chat error: received error event from server")
 			}
 		}
-
+		duration := time.Since(startTime).Seconds()
 		if err := scanner.Err(); err != nil {
 			return nil, fmt.Errorf("error reading stream: %w", err)
 		}
@@ -1633,15 +1644,15 @@ func (c *RAGFlowClient) UseModel(cmd *Command) (ResponseIf, error) {
 		return nil, fmt.Errorf("model identifier not provided")
 	}

-	names := strings.Split(compositeModelName, "/")
+	names := strings.Split(compositeModelName, "@")
 	if len(names) != 3 {
-		return nil, fmt.Errorf("model identifier must be in format 'provider/instance/model'")
+		return nil, fmt.Errorf("model identifier must be in format 'model@instance@provider'")
 	}

 	c.CurrentModel = &CurrentModel{
-		Provider: names[0],
+		Provider: names[2],
 		Instance: names[1],
-		Model:    names[2],
+		Model:    names[0],
 	}

 	var result SimpleResponse
--- a/internal/cli/user_parser.go
+++ b/internal/cli/user_parser.go
@@ -2241,12 +2241,12 @@ func (p *Parser) parseChatCommand() (*Command, error) {
 	var message string

 	// Check if we have a quoted string that looks like a model identifier (contains two slashes)
-	// Format: 'provider/instance/model' or just 'message'
+	// Format: 'model@instance@provider' or just 'message'
 	if p.curToken.Type == TokenQuotedString {
 		firstArg := p.curToken.Value

 		// Check if it looks like a model identifier (contains exactly 2 slashes)
-		slashCount := strings.Count(firstArg, "/")
+		slashCount := strings.Count(firstArg, "@")
 		if slashCount == 2 {
 			// This is likely a model identifier, expect another quoted string for message
 			compositeModelName = firstArg
@@ -2271,18 +2271,69 @@ func (p *Parser) parseChatCommand() (*Command, error) {
 		return nil, fmt.Errorf("expected model name (quoted string) or message")
 	}

+	cmd := NewCommand("chat_to_model")
+
+	effort := "default"
+	verbosity := "low"
+	if p.curToken.Type == TokenWith {
+		p.nextToken() // pass WITH
+		switch p.curToken.Type {
+		case TokenEffort:
+			{
+				p.nextToken() // pass VERBOSITY
+				switch p.curToken.Type {
+				case TokenNone:
+					effort = "none"
+				case TokenMinimal:
+					effort = "minimal"
+				case TokenLow:
+					effort = "low"
+				case TokenMedium:
+					effort = "medium"
+				case TokenHigh:
+					effort = "high"
+				case TokenMax:
+					effort = "max"
+				default:
+					return nil, fmt.Errorf("invalid effort level")
+				}
+				p.nextToken()
+				break
+			}
+		case TokenVerbosity:
+			{
+				p.nextToken() // pass VERBOSITY
+				switch p.curToken.Type {
+				case TokenLow:
+					verbosity = "low"
+				case TokenMedium:
+					verbosity = "median"
+				case TokenHigh:
+					verbosity = "high"
+				default:
+					return nil, fmt.Errorf("invalid verbosity level")
+				}
+				p.nextToken()
+				break
+			}
+		default:
+			return nil, fmt.Errorf("expected VERBOSITY or EFFORT")
+		}
+	}
+
 	// Semicolon is optional
 	if p.curToken.Type == TokenSemicolon {
 		p.nextToken()
 	}

-	cmd := NewCommand("chat_to_model")
 	if compositeModelName != "" {
 		cmd.Params["composite_model_name"] = compositeModelName
 	}
 	cmd.Params["message"] = message
 	cmd.Params["thinking"] = false
 	cmd.Params["stream"] = false
+	cmd.Params["effort"] = effort
+	cmd.Params["verbosity"] = verbosity
 	return cmd, nil
 }

@@ -2369,10 +2420,10 @@ func (p *Parser) parseUseCommand() (*Command, error) {
 	}
 	p.nextToken() // consume MODEL

-	// Parse model identifier in format 'provider/instance/model'
+	// Parse model identifier in format 'model@instance@provider'
 	compositeModelName, err := p.parseQuotedString()
 	if err != nil {
-		return nil, fmt.Errorf("expected model identifier in format 'provider/instance/model': %w", err)
+		return nil, fmt.Errorf("expected model identifier in format 'model@instance@provider': %w", err)
 	}
 	p.nextToken()

--- a/internal/entity/model.go
+++ b/internal/entity/model.go
@@ -159,6 +159,7 @@ type Model struct {
 	MaxTokens    int            `json:"max_tokens"`
 	ModelTypes   []string       `json:"model_types"`
 	Thinking     *ModelThinking `json:"thinking"`
+	Series       *string        `json:"series"`
 	ModelTypeMap map[string]bool
 }

@@ -169,6 +170,7 @@ type Provider struct {
 	URLSuffix   models.URLSuffix  `json:"url_suffix"`
 	Models      []*Model          `json:"models"`
 	Features    Features          `json:"features"`
+	Series      string            `json:"series"`
 	ModelDriver models.ModelDriver
 }

@@ -255,6 +257,14 @@ func NewProviderManager(dirPath string) (*ProviderManager, error) {
 				}
 			}

+			if provider.Series == "" {
+				pos := strings.Index(model.Name, "-")
+				modelSeries := model.Name[0:pos]
+				model.Series = &modelSeries
+			} else {
+				model.Series = &provider.Name
+			}
+
 			model.ModelTypeMap = make(map[string]bool)
 			for _, modelType := range model.ModelTypes {
 				model.ModelTypeMap[modelType] = true
--- a/internal/entity/models/common.go
+++ b/internal/entity/models/common.go
@@ -0,0 +1,47 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package models
+
+import "strings"
+
+func GetThinkingAndAnswer(modelSeries *string, content *string) (*string, *string) {
+	switch *modelSeries {
+	case "qwen3":
+		return extractThinkContent(content)
+	}
+	return nil, content
+}
+
+func extractThinkContent(content *string) (*string, *string) {
+	startTag := "<think>"
+	endTag := "</think>"
+
+	startIdx := strings.Index(*content, startTag)
+	endIdx := strings.Index(*content, endTag)
+
+	if startIdx == -1 || endIdx == -1 || endIdx <= startIdx {
+		return nil, content
+	}
+
+	thinking := (*content)[startIdx+len(startTag) : endIdx]
+	answer := (*content)[endIdx+len(endTag):]
+
+	thinking = strings.TrimLeft(thinking, "\n")
+	answer = strings.TrimLeft(answer, "\n")
+
+	return &thinking, &answer
+}
--- a/internal/entity/models/deepseek.go
+++ b/internal/entity/models/deepseek.go
@@ -17,11 +17,14 @@
 package models

 import (
+	"bufio"
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
+	"ragflow/internal/logger"
+	"strings"
 	"time"
 )

@@ -55,7 +58,160 @@ func (z *DeepSeekModel) Name() string {

 // Chat sends a message and returns response
 func (z *DeepSeekModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
-	return nil, fmt.Errorf("%s, no such method", z.Name())
+	if message == nil {
+		return nil, fmt.Errorf("message is nil")
+	}
+
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
+
+	// Build request body
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			var thinkingFlag string
+			switch *chatModelConfig.Effort {
+			case "none":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "low":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "medium":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "high":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "high"
+				break
+			case "default":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "high"
+				break
+			case "max":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "max"
+				break
+			default:
+				return nil, fmt.Errorf("invalid effort level")
+			}
+			reqBody["thinking"] = map[string]interface{}{
+				"type": thinkingFlag,
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	choices, ok := result["choices"].([]interface{})
+	if !ok || len(choices) == 0 {
+		return nil, fmt.Errorf("no choices in response")
+	}
+
+	firstChoice, ok := choices[0].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid choice format")
+	}
+
+	messageMap, ok := firstChoice["message"].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid message format")
+	}
+
+	content, ok := messageMap["content"].(string)
+	if !ok {
+		return nil, fmt.Errorf("invalid content format")
+	}
+
+	var reasonContent string
+	if chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
+		reasonContent, ok = messageMap["reasoning_content"].(string)
+		if !ok {
+			return nil, fmt.Errorf("invalid content format")
+		}
+		// if first char of reasonContent is \n remove the '\n'
+		if reasonContent != "" && reasonContent[0] == '\n' {
+			reasonContent = reasonContent[1:]
+		}
+	}
+
+	chatResponse := &ChatResponse{
+		Answer:        &content,
+		ReasonContent: &reasonContent,
+	}
+
+	return chatResponse, nil
 }

 // ChatWithMessages sends multiple messages with roles and returns response
@@ -65,7 +221,179 @@ func (z *DeepSeekModel) ChatWithMessages(modelName string, apiKey *string, messa

 // ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
 func (z *DeepSeekModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
-	return nil
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
+
+	// Build request body with streaming enabled
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.DoSample != nil {
+		reqBody["do_sample"] = *chatModelConfig.DoSample
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			var thinkingFlag string
+			switch *chatModelConfig.Effort {
+			case "none":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "low":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "medium":
+				thinkingFlag = "disabled"
+				chatModelConfig.Thinking = nil
+				break
+			case "high":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "high"
+				break
+			case "default":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "high"
+				break
+			case "max":
+				thinkingFlag = "enabled"
+				reqBody["reasoning_effort"] = "max"
+				break
+			default:
+				return fmt.Errorf("invalid effort level")
+			}
+			reqBody["thinking"] = map[string]interface{}{
+				"type": thinkingFlag,
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// SSE parsing: read line by line
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+		logger.Info(line)
+
+		// SSE data line starts with "data:"
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+
+		// Extract JSON after "data:"
+		data := strings.TrimSpace(line[5:])
+
+		// [DONE] marks the end of stream
+		if data == "[DONE]" {
+			break
+		}
+
+		// Parse the JSON event
+		var event map[string]interface{}
+		if err = json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		choices, ok := event["choices"].([]interface{})
+		if !ok || len(choices) == 0 {
+			continue
+		}
+
+		firstChoice, ok := choices[0].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		delta, ok := firstChoice["delta"].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		content, ok := delta["content"].(string)
+		if ok && content != "" {
+			if err := sender(&content, nil); err != nil {
+				return err
+			}
+		}
+
+		reasoningContent, ok := delta["reasoning_content"].(string)
+		if ok && reasoningContent != "" {
+			if err := sender(nil, &reasoningContent); err != nil {
+				return err
+			}
+		}
+
+		finishReason, ok := firstChoice["finish_reason"].(string)
+		if ok && finishReason != "" {
+			break
+		}
+	}
+
+	// Send [DONE] marker for OpenAI compatibility
+	endOfStream := "[DONE]"
+	if err = sender(&endOfStream, nil); err != nil {
+		return err
+	}
+
+	return scanner.Err()
 }

 // EncodeToEmbedding encodes a list of texts into embeddings
@@ -73,15 +401,15 @@ func (z *DeepSeekModel) EncodeToEmbedding(modelName *string, texts []string, api
 	return nil, fmt.Errorf("%s, no such method", z.Name())
 }

-type Model struct {
+type DSModel struct {
 	ID      string `json:"id"`
 	Object  string `json:"object"`
 	OwnedBy string `json:"owned_by"`
 }

-type ModelList struct {
-	Object string  `json:"object"`
-	Models []Model `json:"data"`
+type DSModelList struct {
+	Object string    `json:"object"`
+	Models []DSModel `json:"data"`
 }

 func (z *DeepSeekModel) ListModels(apiConfig *APIConfig) ([]string, error) {
@@ -124,7 +452,7 @@ func (z *DeepSeekModel) ListModels(apiConfig *APIConfig) ([]string, error) {
 	}

 	// Parse response
-	var modelList ModelList
+	var modelList DSModelList
 	if err = json.Unmarshal(body, &modelList); err != nil {
 		return nil, fmt.Errorf("failed to parse response: %w", err)
 	}
--- a/internal/entity/models/factory.go
+++ b/internal/entity/models/factory.go
@@ -41,6 +41,10 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
 		return NewMoonshotModel(baseURL, urlSuffix), nil
 	case "minimax":
 		return NewMinimaxModel(baseURL, urlSuffix), nil
+	case "gitee":
+		return NewGiteeModel(baseURL, urlSuffix), nil
+	case "siliconflow":
+		return NewSiliconflowModel(baseURL, urlSuffix), nil
 	default:
 		return NewDummyModel(baseURL, urlSuffix), nil
 	}
--- a/internal/entity/models/gitee.go
+++ b/internal/entity/models/gitee.go
@@ -0,0 +1,522 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package models
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"ragflow/internal/logger"
+	"strings"
+	"time"
+)
+
+// GiteeModel implements ModelDriver for Gitee
+type GiteeModel struct {
+	BaseURL    map[string]string
+	URLSuffix  URLSuffix
+	httpClient *http.Client // Reusable HTTP client with connection pool
+}
+
+// NewGiteeModel creates a new Gitee model instance
+func NewGiteeModel(baseURL map[string]string, urlSuffix URLSuffix) *GiteeModel {
+	return &GiteeModel{
+		BaseURL:   baseURL,
+		URLSuffix: urlSuffix,
+		httpClient: &http.Client{
+			Timeout: 120 * time.Second,
+			Transport: &http.Transport{
+				MaxIdleConns:        100,
+				MaxIdleConnsPerHost: 10,
+				IdleConnTimeout:     90 * time.Second,
+				DisableCompression:  false,
+			},
+		},
+	}
+}
+
+func (z *GiteeModel) Name() string {
+	return "gitee"
+}
+
+// Chat sends a message and returns response
+func (z *GiteeModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
+	if message == nil {
+		return nil, fmt.Errorf("message is nil")
+	}
+
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
+
+	// I need to get the model series, such as qwen3 is the prefix, the model series will be qwen. glm is the prefix, the model series will be glm. such as the model name: qwen3-0.6b, the model series will be qwen3
+	// the model name is glm-4.7, the model series will be glm
+	modelSeries := strings.Split(*modelName, "-")[0]
+	if modelSeries == "qwen" || modelSeries == "glm" {
+		url = fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.AsyncChat)
+	}
+
+	// Build request body
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "enabled",
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	choices, ok := result["choices"].([]interface{})
+	if !ok || len(choices) == 0 {
+		return nil, fmt.Errorf("no choices in response")
+	}
+
+	firstChoice, ok := choices[0].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid choice format")
+	}
+
+	messageMap, ok := firstChoice["message"].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid message format")
+	}
+
+	content, ok := messageMap["content"].(string)
+	if !ok {
+		return nil, fmt.Errorf("invalid content format")
+	}
+
+	thinking, answer := GetThinkingAndAnswer(chatModelConfig.ModelSeries, &content)
+
+	chatResponse := &ChatResponse{
+		Answer:        answer,
+		ReasonContent: thinking,
+	}
+
+	return chatResponse, nil
+}
+
+// ChatWithMessages sends multiple messages with roles and returns response
+func (z *GiteeModel) ChatWithMessages(modelName string, apiKey *string, messages []Message, chatModelConfig *ChatConfig) (string, error) {
+	return "", fmt.Errorf("%s, ChatWithMessages not implemented", z.Name())
+}
+
+// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
+func (z *GiteeModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
+
+	// Build request body with streaming enabled
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.DoSample != nil {
+		reqBody["do_sample"] = *chatModelConfig.DoSample
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "enabled",
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	reserveText := ""
+	thinkingPhase := false
+	answerPhase := false
+
+	// SSE parsing: read line by line
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+		logger.Info(line)
+
+		// SSE data line starts with "data:"
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+
+		// Extract JSON after "data:"
+		data := strings.TrimSpace(line[5:])
+
+		// [DONE] marks the end of stream
+		if data == "[DONE]" {
+			break
+		}
+
+		// Parse the JSON event
+		var event map[string]interface{}
+		if err = json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		choices, ok := event["choices"].([]interface{})
+		if !ok || len(choices) == 0 {
+			continue
+		}
+
+		firstChoice, ok := choices[0].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		delta, ok := firstChoice["delta"].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		content, ok := delta["content"].(string)
+		if ok && content != "" {
+			if content == "<think>" {
+				thinkingPhase = true
+				continue
+
+			} else if content == "</think>" {
+				thinkingPhase = false
+				answerPhase = true
+				continue
+			}
+
+			if thinkingPhase {
+				if err = sender(nil, &content); err != nil {
+					return err
+				}
+				reserveText = ""
+			} else if answerPhase {
+				if err = sender(&content, nil); err != nil {
+					return err
+				}
+				reserveText = ""
+			} else {
+				content = strings.Trim(content, "\n")
+				content = strings.Trim(content, " ")
+				if content != "" {
+					reserveText += content
+				}
+			}
+		}
+
+		finishReason, ok := firstChoice["finish_reason"].(string)
+		if ok && finishReason != "" {
+			break
+		}
+	}
+
+	if reserveText != "" {
+		if err = sender(&reserveText, nil); err != nil {
+			return err
+		}
+	}
+
+	// Send [DONE] marker for OpenAI compatibility
+	endOfStream := "[DONE]"
+	if err = sender(&endOfStream, nil); err != nil {
+		return err
+	}
+
+	return scanner.Err()
+}
+
+// EncodeToEmbedding encodes a list of texts into embeddings
+func (z *GiteeModel) EncodeToEmbedding(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) {
+	return nil, fmt.Errorf("%s, no such method", z.Name())
+}
+
+func (z *GiteeModel) ListModels(apiConfig *APIConfig) ([]string, error) {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Models)
+
+	// Build request body
+	reqBody := map[string]interface{}{}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var modelList DSModelList
+	if err = json.Unmarshal(body, &modelList); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	var models []string
+	for _, model := range modelList.Models {
+		modelName := model.ID
+		if model.OwnedBy != "" {
+			modelName = model.ID + "@" + model.OwnedBy
+		}
+		models = append(models, modelName)
+	}
+
+	return models, nil
+}
+
+func (z *GiteeModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Balance)
+
+	// Build request body
+	reqBody := map[string]interface{}{}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	balance := result["balance"].(float64)
+
+	var response = map[string]interface{}{
+		"balance":  balance,
+		"currency": "CNY",
+	}
+
+	return response, nil
+}
+
+func (z *GiteeModel) CheckConnection(apiConfig *APIConfig) error {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Status)
+
+	// Build request body
+	reqBody := map[string]interface{}{}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	return nil
+}
--- a/internal/entity/models/siliconflow.go
+++ b/internal/entity/models/siliconflow.go
@@ -0,0 +1,437 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package models
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"ragflow/internal/logger"
+	"strings"
+	"time"
+)
+
+// SiliconflowModel implements ModelDriver for Siliconflow
+type SiliconflowModel struct {
+	BaseURL    map[string]string
+	URLSuffix  URLSuffix
+	httpClient *http.Client // Reusable HTTP client with connection pool
+}
+
+// NewSiliconflowModel creates a new Siliconflow model instance
+func NewSiliconflowModel(baseURL map[string]string, urlSuffix URLSuffix) *SiliconflowModel {
+	return &SiliconflowModel{
+		BaseURL:   baseURL,
+		URLSuffix: urlSuffix,
+		httpClient: &http.Client{
+			Timeout: 120 * time.Second,
+			Transport: &http.Transport{
+				MaxIdleConns:        100,
+				MaxIdleConnsPerHost: 10,
+				IdleConnTimeout:     90 * time.Second,
+				DisableCompression:  false,
+			},
+		},
+	}
+}
+
+func (z *SiliconflowModel) Name() string {
+	return "siliconflow"
+}
+
+// Chat sends a message and returns response
+func (z *SiliconflowModel) Chat(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
+	if message == nil {
+		return nil, fmt.Errorf("message is nil")
+	}
+
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Chat)
+
+	// I need to get the model series, such as qwen3 is the prefix, the model series will be qwen. glm is the prefix, the model series will be glm. such as the model name: qwen3-0.6b, the model series will be qwen3
+	// the model name is glm-4.7, the model series will be glm
+	modelSeries := strings.Split(*modelName, "-")[0]
+	if modelSeries == "qwen" || modelSeries == "glm" {
+		url = fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.AsyncChat)
+	}
+
+	// Build request body
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "enabled",
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	choices, ok := result["choices"].([]interface{})
+	if !ok || len(choices) == 0 {
+		return nil, fmt.Errorf("no choices in response")
+	}
+
+	firstChoice, ok := choices[0].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid choice format")
+	}
+
+	messageMap, ok := firstChoice["message"].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid message format")
+	}
+
+	content, ok := messageMap["content"].(string)
+	if !ok {
+		return nil, fmt.Errorf("invalid content format")
+	}
+
+	thinking, answer := GetThinkingAndAnswer(chatModelConfig.ModelSeries, &content)
+
+	chatResponse := &ChatResponse{
+		Answer:        answer,
+		ReasonContent: thinking,
+	}
+
+	return chatResponse, nil
+}
+
+// ChatWithMessages sends multiple messages with roles and returns response
+func (z *SiliconflowModel) ChatWithMessages(modelName string, apiKey *string, messages []Message, chatModelConfig *ChatConfig) (string, error) {
+	return "", fmt.Errorf("%s, ChatWithMessages not implemented", z.Name())
+}
+
+// ChatStreamlyWithSender sends a message and streams response via sender function (best performance, no channel)
+func (z *SiliconflowModel) ChatStreamlyWithSender(modelName, message *string, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/chat/completions", z.BaseURL[region])
+
+	// Build request body with streaming enabled
+	reqBody := map[string]interface{}{
+		"model": modelName,
+		"messages": []map[string]string{
+			{"role": "user", "content": *message},
+		},
+		"stream":      false,
+		"temperature": 1,
+	}
+
+	if chatModelConfig.Stream != nil {
+		reqBody["stream"] = *chatModelConfig.Stream
+	}
+
+	if chatModelConfig.MaxTokens != nil {
+		reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+	}
+
+	if chatModelConfig.Temperature != nil {
+		reqBody["temperature"] = *chatModelConfig.Temperature
+	}
+
+	if chatModelConfig.DoSample != nil {
+		reqBody["do_sample"] = *chatModelConfig.DoSample
+	}
+
+	if chatModelConfig.TopP != nil {
+		reqBody["top_p"] = *chatModelConfig.TopP
+	}
+
+	if chatModelConfig.Stop != nil {
+		reqBody["stop"] = *chatModelConfig.Stop
+	}
+
+	if chatModelConfig.Thinking != nil {
+		if *chatModelConfig.Thinking {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "enabled",
+			}
+		} else {
+			reqBody["thinking"] = map[string]interface{}{
+				"type": "disabled",
+			}
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	reserveText := ""
+	thinkingPhase := false
+	answerPhase := false
+
+	// SSE parsing: read line by line
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+		logger.Info(line)
+
+		// SSE data line starts with "data:"
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+
+		// Extract JSON after "data:"
+		data := strings.TrimSpace(line[5:])
+
+		// [DONE] marks the end of stream
+		if data == "[DONE]" {
+			break
+		}
+
+		// Parse the JSON event
+		var event map[string]interface{}
+		if err = json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		choices, ok := event["choices"].([]interface{})
+		if !ok || len(choices) == 0 {
+			continue
+		}
+
+		firstChoice, ok := choices[0].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		delta, ok := firstChoice["delta"].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		content, ok := delta["content"].(string)
+		if ok && content != "" {
+			if content == "<think>" {
+				thinkingPhase = true
+				continue
+
+			} else if content == "</think>" {
+				thinkingPhase = false
+				answerPhase = true
+				continue
+			}
+
+			if thinkingPhase {
+				if err = sender(nil, &content); err != nil {
+					return err
+				}
+				reserveText = ""
+			} else if answerPhase {
+				if err = sender(&content, nil); err != nil {
+					return err
+				}
+				reserveText = ""
+			} else {
+				content = strings.Trim(content, "\n")
+				content = strings.Trim(content, " ")
+				if content != "" {
+					reserveText += content
+				}
+			}
+		}
+
+		finishReason, ok := firstChoice["finish_reason"].(string)
+		if ok && finishReason != "" {
+			break
+		}
+	}
+
+	if reserveText != "" {
+		if err = sender(&reserveText, nil); err != nil {
+			return err
+		}
+	}
+
+	// Send [DONE] marker for OpenAI compatibility
+	endOfStream := "[DONE]"
+	if err = sender(&endOfStream, nil); err != nil {
+		return err
+	}
+
+	return scanner.Err()
+}
+
+// EncodeToEmbedding encodes a list of texts into embeddings
+func (z *SiliconflowModel) EncodeToEmbedding(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) {
+	return nil, fmt.Errorf("%s, no such method", z.Name())
+}
+
+func (z *SiliconflowModel) ListModels(apiConfig *APIConfig) ([]string, error) {
+	var region = "default"
+	if apiConfig.Region != nil {
+		region = *apiConfig.Region
+	}
+
+	url := fmt.Sprintf("%s/%s", z.BaseURL[region], z.URLSuffix.Models)
+
+	// Build request body
+	reqBody := map[string]interface{}{}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequest("GET", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := z.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var modelList DSModelList
+	if err = json.Unmarshal(body, &modelList); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	var models []string
+	for _, model := range modelList.Models {
+		modelName := model.ID
+		if model.OwnedBy != "" {
+			modelName = model.ID + "@" + model.OwnedBy
+		}
+		models = append(models, modelName)
+	}
+
+	return models, nil
+}
+
+func (z *SiliconflowModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
+	return nil, fmt.Errorf("%s, no such method", z.Name())
+}
+
+func (z *SiliconflowModel) CheckConnection(apiConfig *APIConfig) error {
+	_, err := z.ListModels(apiConfig)
+	if err != nil {
+		return err
+	}
+	return nil
+}
--- a/internal/entity/models/types.go
+++ b/internal/entity/models/types.go
@@ -41,6 +41,7 @@ type URLSuffix struct {
 	Models      string `json:"models"`
 	Balance     string `json:"balance"`
 	Files       string `json:"files"`
+	Status      string `json:"status"`
 }

 type ChatConfig struct {
@@ -51,6 +52,9 @@ type ChatConfig struct {
 	TopP        *float64
 	DoSample    *bool
 	Stop        *[]string
+	ModelSeries *string
+	Effort      *string
+	Verbosity   *string
 }

 type APIConfig struct {
--- a/internal/handler/providers.go
+++ b/internal/handler/providers.go
@@ -643,9 +643,12 @@ func (h *ProviderHandler) EnableOrDisableModel(c *gin.Context) {
 }

 type ChatToModelRequest struct {
-	Message  string `json:"message" binding:"required"`
-	Stream   bool   `json:"stream"`
-	Thinking bool   `json:"thinking"`
+	ModelName string  `json:"model_name" binding:"required"`
+	Message   string  `json:"message" binding:"required"`
+	Stream    bool    `json:"stream"`
+	Thinking  bool    `json:"thinking"`
+	Effort    *string `json:"effort"`
+	Verbosity *string `json:"verbosity"`
 }

 func (h *ProviderHandler) ChatToModel(c *gin.Context) {
@@ -667,15 +670,6 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {
 		return
 	}

-	modelName := c.Param("model_name")
-	if modelName == "" {
-		c.JSON(http.StatusBadRequest, gin.H{
-			"code":    400,
-			"message": "Model name is required",
-		})
-		return
-	}
-
 	var req ChatToModelRequest
 	if err := c.ShouldBindJSON(&req); err != nil {
 		println("JSON bind error: %v (type: %T)", err, err)
@@ -688,6 +682,28 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {

 	userID := c.GetString("user_id")

+	if !req.Thinking {
+		req.Effort = nil
+		req.Verbosity = nil
+	}
+
+	apiConfig := models.APIConfig{
+		ApiKey: nil,
+		Region: nil,
+	}
+
+	chatConfig := models.ChatConfig{
+		Thinking:    &req.Thinking,
+		Stream:      &req.Stream,
+		Stop:        &[]string{},
+		DoSample:    nil,
+		MaxTokens:   nil,
+		Temperature: nil,
+		TopP:        nil,
+		Effort:      req.Effort,
+		Verbosity:   req.Verbosity,
+	}
+
 	// Check if it's a stream request
 	if req.Stream {
 		// Set SSE headers
@@ -720,23 +736,8 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {
 			return nil
 		}

-		apiConfig := models.APIConfig{
-			ApiKey: nil,
-			Region: nil,
-		}
-
-		chatConfig := models.ChatConfig{
-			Thinking:    &req.Thinking,
-			Stream:      &req.Stream,
-			Stop:        &[]string{},
-			DoSample:    nil,
-			MaxTokens:   nil,
-			Temperature: nil,
-			TopP:        nil,
-		}
-
 		// Stream response using sender function (best performance, no channel)
-		errorCode := h.modelProviderService.ChatToModelStreamWithSender(providerName, instanceName, modelName, userID, req.Message, &apiConfig, &chatConfig, sender)
+		errorCode := h.modelProviderService.ChatToModelStreamWithSender(providerName, instanceName, req.ModelName, userID, req.Message, &apiConfig, &chatConfig, sender)

 		if errorCode != common.CodeSuccess {
 			c.SSEvent("error", "stream failed")
@@ -744,23 +745,8 @@ func (h *ProviderHandler) ChatToModel(c *gin.Context) {
 		return
 	}

-	apiConfig := models.APIConfig{
-		ApiKey: nil,
-		Region: nil,
-	}
-
-	chatConfig := models.ChatConfig{
-		Thinking:    &req.Thinking,
-		Stream:      &req.Stream,
-		Stop:        &[]string{},
-		DoSample:    nil,
-		MaxTokens:   nil,
-		Temperature: nil,
-		TopP:        nil,
-	}
-
 	// Non-stream response
-	response, errorCode, err := h.modelProviderService.ChatToModel(providerName, instanceName, modelName, userID, req.Message, &apiConfig, &chatConfig)
+	response, errorCode, err := h.modelProviderService.ChatToModel(providerName, instanceName, req.ModelName, userID, req.Message, &apiConfig, &chatConfig)
 	if err != nil {
 		c.JSON(http.StatusOK, gin.H{
 			"code":    errorCode,
--- a/internal/router/router.go
+++ b/internal/router/router.go
@@ -218,7 +218,7 @@ func (r *Router) Setup(engine *gin.Engine) {
 				provider.DELETE("/:provider_name/instances", r.providerHandler.DropProviderInstance)
 				provider.GET("/:provider_name/instances/:instance_name/models", r.providerHandler.ListInstanceModels)
 				provider.PATCH("/:provider_name/instances/:instance_name/models/:model_name", r.providerHandler.EnableOrDisableModel)
-				provider.POST("/:provider_name/instances/:instance_name/models/:model_name", r.providerHandler.ChatToModel)
+				provider.POST("/:provider_name/instances/:instance_name/models", r.providerHandler.ChatToModel)
 			}

 			model := v1.Group("/models")
--- a/internal/service/model_service.go
+++ b/internal/service/model_service.go
@@ -770,11 +770,14 @@ func (m *ModelProviderService) ChatToModel(providerName, instanceName, modelName
 			return nil, common.CodeNotFound, errors.New("provider not found")
 		}

-		_, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName)
+		var model *entity.Model = nil
+		model, err = dao.GetModelProviderManager().GetModelByName(providerName, modelName)
 		if err != nil {
 			return nil, common.CodeNotFound, errors.New(fmt.Sprintf("provider %s model %s not found", providerName, modelName))
 		}

+		modelConfig.ModelSeries = model.Series
+
 		var extra map[string]string
 		err = json.Unmarshal([]byte(instance.Extra), &extra)
 		if err != nil {