From 0d7ad0ed0c9f9a8a02c462205d39f2312eb8aefd Mon Sep 17 00:00:00 2001 From: jiashi19 <107411592+jiashi19@users.noreply.github.com> Date: Sun, 28 Jun 2026 12:02:55 +0800 Subject: [PATCH] Feat/agent thinking switch (#15446) ### What problem does this PR solve? This PR adds an Agent LLM setting to control thinking mode for official providers that expose a thinking switch. Related to #12842. Closes #15445. Some providers expose thinking controls through provider-specific request fields, but Agent LLM settings did not have a unified option for users to enable or disable thinking mode. This PR adds a `Thinking` selector with: - System default - Enabled - Disabled 8566b0b4-0546-4c8a-913d-f9bbd38319f6 8a0a6bee-f45f-48d5-bd83-17af260de3db Initial support is limited to the verified official providers: - Qwen / DashScope: `enable_thinking` - Kimi / Moonshot: `thinking.type` - GLM / ZHIPU-AI: `thinking.type` For LiteLLM-based providers, provider-specific fields are forwarded through `extra_body` before `drop_params` filtering so the request parameters are preserved. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: jiashi Co-authored-by: Zhichang Yu --- agent/component/llm.py | 3 + rag/llm/chat_model.py | 136 ++++++++++++--- test/unit_test/rag/llm/conftest.py | 14 ++ .../llm/test_chat_model_thinking_policy.py | 163 ++++++++++++++++++ .../rag/llm/test_clean_conf_whitelist.py | 26 ++- web/src/components/llm-setting-items/next.tsx | 38 ++++ web/src/locales/en.ts | 6 + web/src/locales/zh-traditional.ts | 5 + web/src/locales/zh.ts | 5 + 9 files changed, 367 insertions(+), 29 deletions(-) create mode 100644 test/unit_test/rag/llm/test_chat_model_thinking_policy.py diff --git a/agent/component/llm.py b/agent/component/llm.py index 36770c024b..ebfe8f09c5 100644 --- a/agent/component/llm.py +++ b/agent/component/llm.py @@ -49,6 +49,7 @@ class LLMParam(ComponentParamBase): self.output_structure = None self.cite = True self.visual_files_var = None + self.thinking = "" def check(self): self.check_decimal_float(float(self.temperature), "[Agent] Temperature") @@ -77,6 +78,8 @@ class LLMParam(ComponentParamBase): conf["presence_penalty"] = float(self.presence_penalty) if float(self.frequency_penalty) > 0 and get_attr("frequencyPenaltyEnabled"): conf["frequency_penalty"] = float(self.frequency_penalty) + if get_attr("thinking") in {"enabled", "disabled"}: + conf["thinking"] = get_attr("thinking") return conf diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index dd98e48ab5..169f37beda 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -95,10 +95,12 @@ ALLOWED_GEN_CONF_KEYS = frozenset( # LiteLLM additionally understands reasoning-control parameters that the # model-family policies may inject into `gen_conf` (e.g. `thinking` for -# Anthropic / Kimi reasoning models, `reasoning_effort` for OpenAI o-series). +# Anthropic / Kimi reasoning models, `enable_thinking` for Qwen models, +# `reasoning_effort` for OpenAI o-series). LITELLM_ALLOWED_GEN_CONF_KEYS = ALLOWED_GEN_CONF_KEYS | frozenset( { "thinking", + "enable_thinking", "reasoning_effort", "extra_body", } @@ -117,9 +119,43 @@ def _apply_model_family_policies( sanitized_gen_conf = deepcopy(gen_conf) if gen_conf else {} sanitized_kwargs = dict(request_kwargs) if request_kwargs else {} - # Qwen3 family disables thinking by extra_body on non-stream chat requests. + def _thinking_type(): + val = sanitized_gen_conf.get("thinking") + if isinstance(val, dict): + val = val.get("type") + + enable_thinking = sanitized_gen_conf.get("enable_thinking") + + if isinstance(val, str) and val in {"enabled", "disabled"}: + return val + if isinstance(enable_thinking, bool): + return "enabled" if enable_thinking else "disabled" + return None + + def _pop_thinking_controls(): + sanitized_gen_conf.pop("thinking", None) + sanitized_gen_conf.pop("enable_thinking", None) + + def _merge_extra_body(target: dict, extra: dict) -> None: + body = target.get("extra_body") + if not isinstance(body, dict): + body = {} + body.update(extra) + target["extra_body"] = body + + thinking_type = _thinking_type() + + # Qwen3 keeps RAGFlow's system default of disabling thinking unless explicitly overridden. if "qwen3" in model_name_lower: - sanitized_kwargs["extra_body"] = {"enable_thinking": False} + _pop_thinking_controls() + enable_thinking = thinking_type == "enabled" if thinking_type else False + if backend == "litellm" and provider in { + SupportedLiteLLMProvider.Tongyi_Qianwen, + SupportedLiteLLMProvider.Dashscope, + }: + sanitized_gen_conf["enable_thinking"] = enable_thinking + else: + _merge_extra_body(sanitized_kwargs, {"enable_thinking": enable_thinking}) if backend == "base": return sanitized_gen_conf, sanitized_kwargs @@ -137,27 +173,50 @@ def _apply_model_family_policies( if provider == SupportedLiteLLMProvider.HunYuan: for key in ("presence_penalty", "frequency_penalty"): sanitized_gen_conf.pop(key, None) - elif "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower: - reasoning = sanitized_gen_conf.pop("reasoning", None) - thinking = {"type": "enabled"} - if reasoning is not None: - thinking = {"type": "enabled"} if reasoning else {"type": "disabled"} - elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}: - thinking = {"type": "disabled"} - sanitized_gen_conf["thinking"] = thinking + elif provider == SupportedLiteLLMProvider.Moonshot: + if thinking_type: + _pop_thinking_controls() + sanitized_gen_conf["thinking"] = {"type": thinking_type} - thinking_enabled = thinking.get("type") == "enabled" - sanitized_gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6 - sanitized_gen_conf["top_p"] = 0.95 - sanitized_gen_conf["n"] = 1 - sanitized_gen_conf["presence_penalty"] = 0.0 - sanitized_gen_conf["frequency_penalty"] = 0.0 + if thinking_type or "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower: + sanitized_gen_conf.pop("temperature", None) + sanitized_gen_conf["top_p"] = 0.95 + sanitized_gen_conf["n"] = 1 + sanitized_gen_conf["presence_penalty"] = 0.0 + sanitized_gen_conf["frequency_penalty"] = 0.0 + elif ( + provider == SupportedLiteLLMProvider.ZHIPU_AI + and "glm" in model_name_lower + and thinking_type + ): + _pop_thinking_controls() + sanitized_gen_conf["thinking"] = {"type": thinking_type} return sanitized_gen_conf, sanitized_kwargs return sanitized_gen_conf, sanitized_kwargs +def _move_litellm_provider_body_fields(provider: SupportedLiteLLMProvider | str | None, completion_args: dict) -> dict: + provider_body_fields = { + SupportedLiteLLMProvider.Tongyi_Qianwen: {"enable_thinking"}, + SupportedLiteLLMProvider.Dashscope: {"enable_thinking"}, + SupportedLiteLLMProvider.Moonshot: {"thinking"}, + SupportedLiteLLMProvider.ZHIPU_AI: {"thinking"}, + }.get(provider, set()) + + body = completion_args.get("extra_body") + if not isinstance(body, dict): + body = {} + moved = False + for key in provider_body_fields: + if key in completion_args: + body[key] = completion_args.pop(key) + moved = True + if moved or body: + completion_args["extra_body"] = body + return completion_args + class Base(ABC): def __init__(self, key, model_name, base_url, **kwargs): timeout = int(os.environ.get("LLM_TIMEOUT_SECONDS", 600)) @@ -197,12 +256,6 @@ class Base(ABC): return LLMErrorCode.ERROR_GENERIC def _clean_conf(self, gen_conf): - gen_conf, _ = _apply_model_family_policies( - self.model_name, - backend="base", - gen_conf=gen_conf, - ) - if "max_tokens" in gen_conf: del gen_conf["max_tokens"] @@ -213,10 +266,17 @@ class Base(ABC): logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4)) reasoning_start = False + gen_conf, extra_request_kwargs = _apply_model_family_policies( + self.model_name, + backend="base", + gen_conf=gen_conf, + request_kwargs={}, + ) request_kwargs = {"model": self.model_name, "messages": history, "stream": True, **gen_conf} stop = kwargs.get("stop") if stop: request_kwargs["stop"] = stop + request_kwargs.update(extra_request_kwargs) response = await self.async_client.chat.completions.create(**request_kwargs) async for resp in response: @@ -407,6 +467,12 @@ class Base(ABC): async def async_chat_with_tools(self, system: str, history: list, gen_conf: dict | None = None): gen_conf = dict(gen_conf or {}) gen_conf = self._clean_conf(gen_conf) + gen_conf, extra_request_kwargs = _apply_model_family_policies( + self.model_name, + backend="base", + gen_conf=gen_conf, + request_kwargs={}, + ) if system and history and history[0].get("role") != "system": history.insert(0, {"role": "system", "content": system}) @@ -418,7 +484,7 @@ class Base(ABC): try: for _ in range(self.max_rounds + 1): logging.info(f"{self.tools=}") - response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf) + response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf, **extra_request_kwargs) tk_count += total_token_count_from_response(response) if not response.choices or not response.choices[0].message: raise Exception(f"500 response structure error. Response: {response}") @@ -473,6 +539,12 @@ class Base(ABC): async def async_chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict | None = None): gen_conf = dict(gen_conf or {}) gen_conf = self._clean_conf(gen_conf) + gen_conf, extra_request_kwargs = _apply_model_family_policies( + self.model_name, + backend="base", + gen_conf=gen_conf, + request_kwargs={}, + ) tools = self.tools if system and history and history[0].get("role") != "system": history.insert(0, {"role": "system", "content": system}) @@ -487,7 +559,7 @@ class Base(ABC): reasoning_start = False logging.info(f"[ToolLoop] round={_round} model={self.model_name} tools={[t['function']['name'] for t in tools]}") - response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) + response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf, **extra_request_kwargs) final_tool_calls = {} answer = "" @@ -573,7 +645,15 @@ class Base(ABC): logging.warning(f"Exceed max rounds: {self.max_rounds}") history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"}) - response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf) + response = await self.async_client.chat.completions.create( + model=self.model_name, + messages=history, + stream=True, + tools=tools, + tool_choice="auto", + **gen_conf, + **extra_request_kwargs, + ) async for resp in response: if not hasattr(resp, "choices") or not resp.choices: @@ -619,9 +699,10 @@ class Base(ABC): return final_ans.strip(), tol_token - _, kwargs = _apply_model_family_policies( + gen_conf, kwargs = _apply_model_family_policies( self.model_name, backend="base", + gen_conf=gen_conf, request_kwargs=kwargs, ) @@ -2080,6 +2161,7 @@ class LiteLLMBase(ABC): api_base = completion_args.get("api_base", self.base_url) separator = "&" if "?" in api_base else "?" completion_args["api_base"] = f"{api_base}{separator}GroupId={self.group_id}" + _move_litellm_provider_body_fields(self.provider, completion_args) if extra_headers: completion_args["extra_headers"] = extra_headers return completion_args diff --git a/test/unit_test/rag/llm/conftest.py b/test/unit_test/rag/llm/conftest.py index 3d9bf31caa..60ed0a9965 100644 --- a/test/unit_test/rag/llm/conftest.py +++ b/test/unit_test/rag/llm/conftest.py @@ -27,6 +27,7 @@ works without triggering the full init. import os import sys import types +from enum import StrEnum # Resolve the real path to rag/llm/ so sub-module imports can find files _RAGFLOW_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) @@ -55,6 +56,19 @@ def _install_rag_llm_stub(): llm_pkg.Seq2txtModel = {} llm_pkg.TTSModel = {} llm_pkg.OcrModel = {} + + class SupportedLiteLLMProvider(StrEnum): + Tongyi_Qianwen = "Tongyi-Qianwen" + Dashscope = "Dashscope" + Moonshot = "Moonshot" + ZHIPU_AI = "ZHIPU-AI" + OpenAI = "OpenAI" + Azure_OpenAI = "Azure-OpenAI" + HunYuan = "Tencent Hunyuan" + + llm_pkg.SupportedLiteLLMProvider = SupportedLiteLLMProvider + llm_pkg.FACTORY_DEFAULT_BASE_URL = {} + llm_pkg.LITELLM_PROVIDER_PREFIX = {} sys.modules["rag.llm"] = llm_pkg diff --git a/test/unit_test/rag/llm/test_chat_model_thinking_policy.py b/test/unit_test/rag/llm/test_chat_model_thinking_policy.py new file mode 100644 index 0000000000..cc9ce65351 --- /dev/null +++ b/test/unit_test/rag/llm/test_chat_model_thinking_policy.py @@ -0,0 +1,163 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from rag.llm import SupportedLiteLLMProvider +from rag.llm.chat_model import _apply_model_family_policies, _move_litellm_provider_body_fields + +pytestmark = pytest.mark.p1 + + +def test_qwen3_uses_system_disabled_default(): + gen_conf, kwargs = _apply_model_family_policies( + "qwen3-plus", + backend="base", + gen_conf={}, + request_kwargs={}, + ) + + assert gen_conf == {} + assert kwargs["extra_body"]["enable_thinking"] is False + + +def test_qwen3_can_enable_thinking_explicitly(): + gen_conf, kwargs = _apply_model_family_policies( + "qwen3-plus", + backend="base", + gen_conf={"thinking": "enabled", "temperature": 0.2}, + request_kwargs={"extra_body": {"seed": 1}}, + ) + + assert gen_conf == {"temperature": 0.2} + assert kwargs["extra_body"] == {"seed": 1, "enable_thinking": True} + + +@pytest.mark.parametrize( + "provider", + [SupportedLiteLLMProvider.Tongyi_Qianwen, SupportedLiteLLMProvider.Dashscope], +) +def test_qwen3_litellm_provider_uses_provider_field(provider): + gen_conf, kwargs = _apply_model_family_policies( + "qwen3-max", + backend="litellm", + provider=provider, + gen_conf={"thinking": "disabled"}, + request_kwargs={}, + ) + + assert kwargs == {} + assert gen_conf["enable_thinking"] is False + + +def test_kimi_thinking_maps_to_moonshot_payload(): + gen_conf, kwargs = _apply_model_family_policies( + "kimi-k2.6-preview", + backend="litellm", + provider=SupportedLiteLLMProvider.Moonshot, + gen_conf={"thinking": "disabled", "temperature": 0.6}, + request_kwargs={}, + ) + + assert kwargs == {} + assert gen_conf["thinking"] == {"type": "disabled"} + assert "temperature" not in gen_conf + + +def test_moonshot_explicit_thinking_does_not_require_exact_kimi_model_name(): + gen_conf, kwargs = _apply_model_family_policies( + "kimi-latest", + backend="litellm", + provider=SupportedLiteLLMProvider.Moonshot, + gen_conf={"thinking": "disabled"}, + request_kwargs={}, + ) + + assert kwargs == {} + assert gen_conf["thinking"] == {"type": "disabled"} + + +def test_kimi_keeps_provider_default_when_unspecified(): + gen_conf, kwargs = _apply_model_family_policies( + "kimi-k2.5-preview", + backend="litellm", + provider=SupportedLiteLLMProvider.Moonshot, + gen_conf={"temperature": 0.6}, + request_kwargs={}, + ) + + assert kwargs == {} + assert "thinking" not in gen_conf + assert "temperature" not in gen_conf + assert gen_conf["top_p"] == 0.95 + assert gen_conf["n"] == 1 + assert gen_conf["presence_penalty"] == 0.0 + assert gen_conf["frequency_penalty"] == 0.0 + + +def test_glm_keeps_provider_default_when_unspecified(): + gen_conf, kwargs = _apply_model_family_policies( + "glm-4.7", + backend="litellm", + provider=SupportedLiteLLMProvider.ZHIPU_AI, + gen_conf={}, + request_kwargs={}, + ) + + assert kwargs == {} + assert gen_conf == {} + + +def test_glm_thinking_maps_to_zhipu_payload(): + gen_conf, kwargs = _apply_model_family_policies( + "glm-4.7", + backend="litellm", + provider=SupportedLiteLLMProvider.ZHIPU_AI, + gen_conf={"thinking": "enabled"}, + request_kwargs={}, + ) + + assert kwargs == {} + assert gen_conf["thinking"] == {"type": "enabled"} + + +def test_litellm_provider_body_fields_move_to_extra_body_before_drop_params(): + completion_args = { + "model": "kimi-latest", + "messages": [], + "thinking": {"type": "disabled"}, + "temperature": 0.2, + } + + _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Moonshot, completion_args) + + assert completion_args["extra_body"]["thinking"] == {"type": "disabled"} + assert "thinking" not in completion_args + assert completion_args["temperature"] == 0.2 + + +def test_litellm_provider_body_fields_preserve_existing_extra_body(): + completion_args = { + "model": "qwen3-max", + "messages": [], + "enable_thinking": False, + "extra_body": {"seed": 1}, + } + + _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Tongyi_Qianwen, completion_args) + + assert completion_args["extra_body"] == {"seed": 1, "enable_thinking": False} + assert "enable_thinking" not in completion_args diff --git a/test/unit_test/rag/llm/test_clean_conf_whitelist.py b/test/unit_test/rag/llm/test_clean_conf_whitelist.py index 019a27be1a..1ff9abd209 100644 --- a/test/unit_test/rag/llm/test_clean_conf_whitelist.py +++ b/test/unit_test/rag/llm/test_clean_conf_whitelist.py @@ -31,8 +31,15 @@ These tests pin the whitelisting behaviour for both backends so the leak cannot reappear. """ +import sys +from unittest.mock import MagicMock + import pytest +if isinstance(sys.modules.get("rag.llm.chat_model"), MagicMock): + del sys.modules["rag.llm.chat_model"] + +from rag.llm import SupportedLiteLLMProvider from rag.llm.chat_model import ( ALLOWED_GEN_CONF_KEYS, LITELLM_ALLOWED_GEN_CONF_KEYS, @@ -78,6 +85,13 @@ def test_base_drops_model_type(): assert cleaned["temperature"] == 0.5 +def test_base_drops_litellm_reasoning_controls(): + cleaned = _make_base()._clean_conf({"temperature": 0.5, "thinking": {"type": "enabled"}, "enable_thinking": True}) + assert "thinking" not in cleaned + assert "enable_thinking" not in cleaned + assert cleaned["temperature"] == 0.5 + + @pytest.mark.parametrize("stray_key", ["model_type", "llm_id", "parameter", "icon", "foo"]) def test_litellm_drops_arbitrary_internal_keys(stray_key): cleaned = _make_litellm()._clean_conf({stray_key: "x", "top_p": 0.9}) @@ -100,12 +114,20 @@ def test_litellm_preserves_known_generation_params(): def test_litellm_preserves_thinking_param(): - """``thinking`` is injected by the model-family policy for reasoning - models and must survive the whitelist (it is a valid LiteLLM param).""" + """``thinking`` is a valid LiteLLM parameter even without a provider policy.""" cleaned = _make_litellm()._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0}) assert cleaned["thinking"] == {"type": "enabled"} +def test_litellm_preserves_provider_mapped_thinking_param(): + """Provider-mapped ``thinking`` must survive the LiteLLM whitelist.""" + cleaned = _make_litellm( + "kimi-k2.6-preview", + SupportedLiteLLMProvider.Moonshot, + )._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0}) + assert cleaned["thinking"] == {"type": "enabled"} + + def test_max_tokens_is_dropped_on_both_backends(): assert "max_tokens" not in _make_litellm()._clean_conf({"max_tokens": 100, "temperature": 0.3}) assert "max_tokens" not in _make_base()._clean_conf({"max_tokens": 100, "temperature": 0.3}) diff --git a/web/src/components/llm-setting-items/next.tsx b/web/src/components/llm-setting-items/next.tsx index 66344a4cf8..8c93c30319 100644 --- a/web/src/components/llm-setting-items/next.tsx +++ b/web/src/components/llm-setting-items/next.tsx @@ -38,6 +38,7 @@ interface LlmSettingFieldItemsProps { | 'presence_penalty' | 'frequency_penalty' | 'max_tokens' + | 'thinking' >; showCollapse?: boolean; } @@ -61,6 +62,7 @@ export const LlmSettingFieldSchema = { frequency_penalty: z.coerce.number().optional(), max_tokens: z.number().optional(), parameter: z.string().optional(), + thinking: z.enum(['default', 'enabled', 'disabled']).optional(), }; export const LlmSettingSchema = { @@ -80,6 +82,7 @@ export function LlmSettingFieldItems({ 'presence_penalty', 'frequency_penalty', 'max_tokens', + 'thinking', ], llmId, showCollapse = false, @@ -249,6 +252,41 @@ export function LlmSettingFieldItems({ }} > )} + {showFields.some((item) => item === 'thinking') && ( + ( + + + {t('thinking')} + + + + + + + )} + /> + )} diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 01517b9ed8..19b5a6ad14 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1056,6 +1056,12 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s maxTokensTip: `The maximum context size of the model; an invalid or incorrect value will cause an error. Defaults to 512.`, maxTokensInvalidMessage: 'Please enter a valid number for Max tokens.', maxTokensMinMessage: 'Max tokens cannot be less than 0.', + thinking: 'Thinking', + thinkingDefault: 'System default', + thinkingEnabled: 'Enabled', + thinkingDisabled: 'Disabled', + thinkingTip: + 'Only controls thinking mode for official Qwen, Kimi, and GLM model providers. System default disables Qwen thinking to avoid long-running tasks.', quote: 'Show quote', quoteTip: 'Whether to display the original text as a reference.', selfRag: 'Self-RAG', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 20d2907003..6f9e29bf52 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -506,6 +506,11 @@ export default { maxTokensTip: `模型的最大上下文大小;無效或不正確的值會導致錯誤。預設為 512。`, maxTokensInvalidMessage: '請輸入有效的最大標記數。', maxTokensMinMessage: '最大標記數不能小於 0。', + thinking: '思考', + thinkingDefault: '系統預設', + thinkingEnabled: '開啟', + thinkingDisabled: '關閉', + thinkingTip: '僅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系統預設會關閉 Qwen 思考,以避免任務長時間執行。', quote: '顯示引文', quoteTip: '是否應該顯示原文出處?', selfRag: 'Self-RAG', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index ce1d0c868c..ddedd69359 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -957,6 +957,11 @@ NER:使用 spaCy NER 和基于规则的关键词提取来抽取实体和关系 maxTokensTip: `模型的最大上下文大小;无效或不正确的值会导致错误。默认值为 512。`, maxTokensInvalidMessage: '请输入有效的最大令牌数。', maxTokensMinMessage: '最大令牌数不能小于 0。', + thinking: '思考', + thinkingDefault: '系统默认', + thinkingEnabled: '开启', + thinkingDisabled: '关闭', + thinkingTip: '仅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系统默认会关闭 Qwen 思考,以避免任务长时间运行。', quote: '显示引文', quoteTip: '是否应该显示原文出处?', selfRag: 'Self-RAG',