From 658ff06ca42d242e0c418f2d13105ec88dc4ec89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=86=E8=90=8C=E9=97=B7=E6=B2=B9=E7=93=B6?=
 <253605712@qq.com>
Date: Fri, 29 May 2026 19:28:29 +0800
Subject: [PATCH] feat: add 4 new models for siliconflow (#15383)

### What problem does this PR solve?

Added 4 new models:
deepseek-ai/DeepSeek-V4-Pro
deepseek-ai/DeepSeek-V4-Flash
Pro/moonshotai/Kimi-K2.6
Pro/zai-org/GLM-5.1

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 conf/llm_factories.json       | 28 ++++++++++++++++
 conf/models/siliconflow.json  | 45 +++++++++++++++++++++++++
 internal/entity/model_test.go | 62 +++++++++++++++++++++++++++++++++++
 rag/llm/chat_model.py         |  2 +-
 4 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index b8da976234..f9551ccf47 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -2927,6 +2927,20 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "Pro/deepseek-ai/DeepSeek-V4-Pro",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Pro/deepseek-ai/DeepSeek-V4-Flash",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "deepseek-ai/DeepSeek-V3.1",
                     "tags": "LLM,CHAT,160",
@@ -3116,6 +3130,13 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "Pro/moonshotai/Kimi-K2.6",
+                    "tags": "LLM,CHAT,IMAGE2TEXT,262k",
+                    "max_tokens": 262000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "Pro/zai-org/GLM-4.7",
                     "tags": "LLM,CHAT,205k",
@@ -3123,6 +3144,13 @@
                     "model_type": "chat",
                     "is_tools": true
                 },
+                {
+                    "llm_name": "Pro/zai-org/GLM-5.1",
+                    "tags": "LLM,CHAT,205k",
+                    "max_tokens": 205000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
                 {
                     "llm_name": "deepseek-ai/DeepSeek-V3.2",
                     "tags": "LLM,CHAT,164k",
diff --git a/conf/models/siliconflow.json b/conf/models/siliconflow.json
index 320d21aba5..06ec018a70 100644
--- a/conf/models/siliconflow.json
+++ b/conf/models/siliconflow.json
@@ -13,6 +13,51 @@
     "asr": "audio/transcriptions"
   },
   "models": [
+    {
+      "name": "Pro/deepseek-ai/DeepSeek-V4-Pro",
+      "max_tokens": 1048576,
+      "model_types": [
+        "chat"
+      ],
+      "thinking": {
+        "default_value": true,
+        "clear_thinking": true
+      }
+    },
+    {
+      "name": "Pro/deepseek-ai/DeepSeek-V4-Flash",
+      "max_tokens": 1048576,
+      "model_types": [
+        "chat"
+      ],
+      "thinking": {
+        "default_value": true,
+        "clear_thinking": true
+      }
+    },
+    {
+      "name": "Pro/moonshotai/Kimi-K2.6",
+      "max_tokens": 262144,
+      "model_types": [
+        "chat",
+        "vision"
+      ],
+      "thinking": {
+        "default_value": true,
+        "clear_thinking": true
+      }
+    },
+    {
+      "name": "Pro/zai-org/GLM-5.1",
+      "max_tokens": 204800,
+      "model_types": [
+        "chat"
+      ],
+      "thinking": {
+        "default_value": true,
+        "clear_thinking": true
+      }
+    },
     {
       "name": "qwen/qwen3-8b",
       "max_tokens": 32768,
diff --git a/internal/entity/model_test.go b/internal/entity/model_test.go
index 0076128dbc..591e9943df 100644
--- a/internal/entity/model_test.go
+++ b/internal/entity/model_test.go
@@ -209,3 +209,65 @@ func TestPPIOProviderConfigLoadsIntoProviderManager(t *testing.T) {
 		t.Errorf("SearchByType data count=%d, want 21", len(resp.Data))
 	}
 }
+
+func TestSiliconFlowProviderConfigLoadsLatestProModels(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "siliconflow.json"), readProviderConfig(t, "siliconflow.json"), 0o600); err != nil {
+		t.Fatalf("write siliconflow config: %v", err)
+	}
+
+	pm, err := NewProviderManager(dir)
+	if err != nil {
+		t.Fatalf("NewProviderManager: %v", err)
+	}
+
+	provider := pm.FindProvider("SiliconFlow")
+	if provider == nil {
+		t.Fatal("SiliconFlow provider not found")
+	}
+	if provider.URL["default"] != "https://api.siliconflow.cn/v1" {
+		t.Errorf("default URL=%q", provider.URL["default"])
+	}
+	if provider.URLSuffix.Chat != "chat/completions" {
+		t.Errorf("chat suffix=%q", provider.URLSuffix.Chat)
+	}
+	if _, ok := provider.ModelDriver.(*modeldrivers.SiliconflowModel); !ok {
+		t.Fatalf("ModelDriver=%T, want *models.SiliconflowModel", provider.ModelDriver)
+	}
+	if provider.ModelDriver.Name() != "siliconflow" {
+		t.Errorf("ModelDriver.Name()=%q", provider.ModelDriver.Name())
+	}
+	if len(provider.Models) != 12 {
+		t.Fatalf("SiliconFlow model count=%d, want 12", len(provider.Models))
+	}
+
+	deepSeekV4Pro, err := pm.GetModelByName("SiliconFlow", "Pro/deepseek-ai/DeepSeek-V4-Pro")
+	if err != nil {
+		t.Fatalf("GetModelByName DeepSeek-V4-Pro: %v", err)
+	}
+	if deepSeekV4Pro.MaxTokens != 1048576 {
+		t.Errorf("DeepSeek-V4-Pro max_tokens=%d", deepSeekV4Pro.MaxTokens)
+	}
+	if !deepSeekV4Pro.ModelTypeMap["chat"] {
+		t.Errorf("DeepSeek-V4-Pro model types=%v, want chat", deepSeekV4Pro.ModelTypes)
+	}
+
+	kimiK26, err := pm.GetModelByName("SiliconFlow", "Pro/moonshotai/Kimi-K2.6")
+	if err != nil {
+		t.Fatalf("GetModelByName Kimi-K2.6: %v", err)
+	}
+	if kimiK26.MaxTokens != 262144 {
+		t.Errorf("Kimi-K2.6 max_tokens=%d", kimiK26.MaxTokens)
+	}
+	if !kimiK26.ModelTypeMap["chat"] || !kimiK26.ModelTypeMap["vision"] {
+		t.Errorf("Kimi-K2.6 model types=%v, want chat+vision", kimiK26.ModelTypes)
+	}
+
+	glm51, err := pm.GetModelByName("SiliconFlow", "Pro/zai-org/GLM-5.1")
+	if err != nil {
+		t.Fatalf("GetModelByName GLM-5.1: %v", err)
+	}
+	if glm51.MaxTokens != 204800 {
+		t.Errorf("GLM-5.1 max_tokens=%d", glm51.MaxTokens)
+	}
+}
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 82a9e48f9b..95d86c4b93 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -90,7 +90,7 @@ def _apply_model_family_policies(
         if provider == SupportedLiteLLMProvider.HunYuan:
             for key in ("presence_penalty", "frequency_penalty"):
                 sanitized_gen_conf.pop(key, None)
-        elif "kimi-k2.5" in model_name_lower:
+        elif "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower:
             reasoning = sanitized_gen_conf.pop("reasoning", None)
             thinking = {"type": "enabled"}
             if reasoning is not None: