From 0d7ad0ed0c9f9a8a02c462205d39f2312eb8aefd Mon Sep 17 00:00:00 2001
From: jiashi19 <107411592+jiashi19@users.noreply.github.com>
Date: Sun, 28 Jun 2026 12:02:55 +0800
Subject: [PATCH] Feat/agent thinking switch (#15446)

### What problem does this PR solve?

This PR adds an Agent LLM setting to control thinking mode for official
providers that expose a thinking switch.

Related to #12842.
Closes #15445.

Some providers expose thinking controls through provider-specific
request fields, but Agent LLM settings did not have a unified option for
users to enable or disable thinking mode.

This PR adds a `Thinking` selector with:

- System default
- Enabled
- Disabled
<img width="452" height="278" alt="8566b0b4-0546-4c8a-913d-f9bbd38319f6"
src="https://github.com/user-attachments/assets/25b497f7-1ba0-4bfe-940d-6fe79287d6ab"
/>
<img width="471" height="971" alt="8a0a6bee-f45f-48d5-bd83-17af260de3db"
src="https://github.com/user-attachments/assets/41ad43c1-5087-48f1-bf37-f2ca14c2be2f"
/>
Initial support is limited to the verified official providers:

- Qwen / DashScope: `enable_thinking`
- Kimi / Moonshot: `thinking.type`
- GLM / ZHIPU-AI: `thinking.type`

For LiteLLM-based providers, provider-specific fields are forwarded
through `extra_body` before `drop_params` filtering so the request
parameters are preserved.



### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: jiashi <jiashi19@outlook.com>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
---
 agent/component/llm.py                        |   3 +
 rag/llm/chat_model.py                         | 136 ++++++++++++---
 test/unit_test/rag/llm/conftest.py            |  14 ++
 .../llm/test_chat_model_thinking_policy.py    | 163 ++++++++++++++++++
 .../rag/llm/test_clean_conf_whitelist.py      |  26 ++-
 web/src/components/llm-setting-items/next.tsx |  38 ++++
 web/src/locales/en.ts                         |   6 +
 web/src/locales/zh-traditional.ts             |   5 +
 web/src/locales/zh.ts                         |   5 +
 9 files changed, 367 insertions(+), 29 deletions(-)
 create mode 100644 test/unit_test/rag/llm/test_chat_model_thinking_policy.py
diff --git a/agent/component/llm.py b/agent/component/llm.py
index 36770c024b..ebfe8f09c5 100644
--- a/agent/component/llm.py
+++ b/agent/component/llm.py
@@ -49,6 +49,7 @@ class LLMParam(ComponentParamBase):
         self.output_structure = None
         self.cite = True
         self.visual_files_var = None
+        self.thinking = ""
 
     def check(self):
         self.check_decimal_float(float(self.temperature), "[Agent] Temperature")
@@ -77,6 +78,8 @@ class LLMParam(ComponentParamBase):
             conf["presence_penalty"] = float(self.presence_penalty)
         if float(self.frequency_penalty) > 0 and get_attr("frequencyPenaltyEnabled"):
             conf["frequency_penalty"] = float(self.frequency_penalty)
+        if get_attr("thinking") in {"enabled", "disabled"}:
+            conf["thinking"] = get_attr("thinking")
         return conf
 
 
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index dd98e48ab5..169f37beda 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -95,10 +95,12 @@ ALLOWED_GEN_CONF_KEYS = frozenset(
 
 # LiteLLM additionally understands reasoning-control parameters that the
 # model-family policies may inject into `gen_conf` (e.g. `thinking` for
-# Anthropic / Kimi reasoning models, `reasoning_effort` for OpenAI o-series).
+# Anthropic / Kimi reasoning models, `enable_thinking` for Qwen models,
+# `reasoning_effort` for OpenAI o-series).
 LITELLM_ALLOWED_GEN_CONF_KEYS = ALLOWED_GEN_CONF_KEYS | frozenset(
     {
         "thinking",
+        "enable_thinking",
         "reasoning_effort",
         "extra_body",
     }
@@ -117,9 +119,43 @@ def _apply_model_family_policies(
     sanitized_gen_conf = deepcopy(gen_conf) if gen_conf else {}
     sanitized_kwargs = dict(request_kwargs) if request_kwargs else {}
 
-    # Qwen3 family disables thinking by extra_body on non-stream chat requests.
+    def _thinking_type():
+        val = sanitized_gen_conf.get("thinking")
+        if isinstance(val, dict):
+            val = val.get("type")
+
+        enable_thinking = sanitized_gen_conf.get("enable_thinking")
+
+        if isinstance(val, str) and val in {"enabled", "disabled"}:
+            return val
+        if isinstance(enable_thinking, bool):
+            return "enabled" if enable_thinking else "disabled"
+        return None
+
+    def _pop_thinking_controls():
+        sanitized_gen_conf.pop("thinking", None)
+        sanitized_gen_conf.pop("enable_thinking", None)
+
+    def _merge_extra_body(target: dict, extra: dict) -> None:
+        body = target.get("extra_body")
+        if not isinstance(body, dict):
+            body = {}
+        body.update(extra)
+        target["extra_body"] = body
+
+    thinking_type = _thinking_type()
+
+    # Qwen3 keeps RAGFlow's system default of disabling thinking unless explicitly overridden.
     if "qwen3" in model_name_lower:
-        sanitized_kwargs["extra_body"] = {"enable_thinking": False}
+        _pop_thinking_controls()
+        enable_thinking = thinking_type == "enabled" if thinking_type else False
+        if backend == "litellm" and provider in {
+            SupportedLiteLLMProvider.Tongyi_Qianwen,
+            SupportedLiteLLMProvider.Dashscope,
+        }:
+            sanitized_gen_conf["enable_thinking"] = enable_thinking
+        else:
+            _merge_extra_body(sanitized_kwargs, {"enable_thinking": enable_thinking})
 
     if backend == "base":
         return sanitized_gen_conf, sanitized_kwargs
@@ -137,27 +173,50 @@ def _apply_model_family_policies(
         if provider == SupportedLiteLLMProvider.HunYuan:
             for key in ("presence_penalty", "frequency_penalty"):
                 sanitized_gen_conf.pop(key, None)
-        elif "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower:
-            reasoning = sanitized_gen_conf.pop("reasoning", None)
-            thinking = {"type": "enabled"}
-            if reasoning is not None:
-                thinking = {"type": "enabled"} if reasoning else {"type": "disabled"}
-            elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}:
-                thinking = {"type": "disabled"}
-            sanitized_gen_conf["thinking"] = thinking
+        elif provider == SupportedLiteLLMProvider.Moonshot:
+            if thinking_type:
+                _pop_thinking_controls()
+                sanitized_gen_conf["thinking"] = {"type": thinking_type}
 
-            thinking_enabled = thinking.get("type") == "enabled"
-            sanitized_gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6
-            sanitized_gen_conf["top_p"] = 0.95
-            sanitized_gen_conf["n"] = 1
-            sanitized_gen_conf["presence_penalty"] = 0.0
-            sanitized_gen_conf["frequency_penalty"] = 0.0
+            if thinking_type or "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower:
+                sanitized_gen_conf.pop("temperature", None)
+                sanitized_gen_conf["top_p"] = 0.95
+                sanitized_gen_conf["n"] = 1
+                sanitized_gen_conf["presence_penalty"] = 0.0
+                sanitized_gen_conf["frequency_penalty"] = 0.0
+        elif (
+            provider == SupportedLiteLLMProvider.ZHIPU_AI
+            and "glm" in model_name_lower
+            and thinking_type
+        ):
+            _pop_thinking_controls()
+            sanitized_gen_conf["thinking"] = {"type": thinking_type}
 
         return sanitized_gen_conf, sanitized_kwargs
 
     return sanitized_gen_conf, sanitized_kwargs
 
 
+def _move_litellm_provider_body_fields(provider: SupportedLiteLLMProvider | str | None, completion_args: dict) -> dict:
+    provider_body_fields = {
+        SupportedLiteLLMProvider.Tongyi_Qianwen: {"enable_thinking"},
+        SupportedLiteLLMProvider.Dashscope: {"enable_thinking"},
+        SupportedLiteLLMProvider.Moonshot: {"thinking"},
+        SupportedLiteLLMProvider.ZHIPU_AI: {"thinking"},
+    }.get(provider, set())
+
+    body = completion_args.get("extra_body")
+    if not isinstance(body, dict):
+        body = {}
+    moved = False
+    for key in provider_body_fields:
+        if key in completion_args:
+            body[key] = completion_args.pop(key)
+            moved = True
+    if moved or body:
+        completion_args["extra_body"] = body
+    return completion_args
+
 class Base(ABC):
     def __init__(self, key, model_name, base_url, **kwargs):
         timeout = int(os.environ.get("LLM_TIMEOUT_SECONDS", 600))
@@ -197,12 +256,6 @@ class Base(ABC):
         return LLMErrorCode.ERROR_GENERIC
 
     def _clean_conf(self, gen_conf):
-        gen_conf, _ = _apply_model_family_policies(
-            self.model_name,
-            backend="base",
-            gen_conf=gen_conf,
-        )
-
         if "max_tokens" in gen_conf:
             del gen_conf["max_tokens"]
 
@@ -213,10 +266,17 @@ class Base(ABC):
         logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4))
         reasoning_start = False
 
+        gen_conf, extra_request_kwargs = _apply_model_family_policies(
+            self.model_name,
+            backend="base",
+            gen_conf=gen_conf,
+            request_kwargs={},
+        )
         request_kwargs = {"model": self.model_name, "messages": history, "stream": True, **gen_conf}
         stop = kwargs.get("stop")
         if stop:
             request_kwargs["stop"] = stop
+        request_kwargs.update(extra_request_kwargs)
 
         response = await self.async_client.chat.completions.create(**request_kwargs)
         async for resp in response:
@@ -407,6 +467,12 @@ class Base(ABC):
     async def async_chat_with_tools(self, system: str, history: list, gen_conf: dict | None = None):
         gen_conf = dict(gen_conf or {})
         gen_conf = self._clean_conf(gen_conf)
+        gen_conf, extra_request_kwargs = _apply_model_family_policies(
+            self.model_name,
+            backend="base",
+            gen_conf=gen_conf,
+            request_kwargs={},
+        )
         if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
 
@@ -418,7 +484,7 @@ class Base(ABC):
             try:
                 for _ in range(self.max_rounds + 1):
                     logging.info(f"{self.tools=}")
-                    response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
+                    response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf, **extra_request_kwargs)
                     tk_count += total_token_count_from_response(response)
                     if not response.choices or not response.choices[0].message:
                         raise Exception(f"500 response structure error. Response: {response}")
@@ -473,6 +539,12 @@ class Base(ABC):
     async def async_chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict | None = None):
         gen_conf = dict(gen_conf or {})
         gen_conf = self._clean_conf(gen_conf)
+        gen_conf, extra_request_kwargs = _apply_model_family_policies(
+            self.model_name,
+            backend="base",
+            gen_conf=gen_conf,
+            request_kwargs={},
+        )
         tools = self.tools
         if system and history and history[0].get("role") != "system":
             history.insert(0, {"role": "system", "content": system})
@@ -487,7 +559,7 @@ class Base(ABC):
                     reasoning_start = False
                     logging.info(f"[ToolLoop] round={_round} model={self.model_name} tools={[t['function']['name'] for t in tools]}")
 
-                    response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
+                    response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf, **extra_request_kwargs)
 
                     final_tool_calls = {}
                     answer = ""
@@ -573,7 +645,15 @@ class Base(ABC):
                 logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
 
-                response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
+                response = await self.async_client.chat.completions.create(
+                    model=self.model_name,
+                    messages=history,
+                    stream=True,
+                    tools=tools,
+                    tool_choice="auto",
+                    **gen_conf,
+                    **extra_request_kwargs,
+                )
 
                 async for resp in response:
                     if not hasattr(resp, "choices") or not resp.choices:
@@ -619,9 +699,10 @@ class Base(ABC):
 
             return final_ans.strip(), tol_token
 
-        _, kwargs = _apply_model_family_policies(
+        gen_conf, kwargs = _apply_model_family_policies(
             self.model_name,
             backend="base",
+            gen_conf=gen_conf,
             request_kwargs=kwargs,
         )
 
@@ -2080,6 +2161,7 @@ class LiteLLMBase(ABC):
             api_base = completion_args.get("api_base", self.base_url)
             separator = "&" if "?" in api_base else "?"
             completion_args["api_base"] = f"{api_base}{separator}GroupId={self.group_id}"
+        _move_litellm_provider_body_fields(self.provider, completion_args)
         if extra_headers:
             completion_args["extra_headers"] = extra_headers
         return completion_args
diff --git a/test/unit_test/rag/llm/conftest.py b/test/unit_test/rag/llm/conftest.py
index 3d9bf31caa..60ed0a9965 100644
--- a/test/unit_test/rag/llm/conftest.py
+++ b/test/unit_test/rag/llm/conftest.py
@@ -27,6 +27,7 @@ works without triggering the full init.
 import os
 import sys
 import types
+from enum import StrEnum
 
 # Resolve the real path to rag/llm/ so sub-module imports can find files
 _RAGFLOW_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
@@ -55,6 +56,19 @@ def _install_rag_llm_stub():
     llm_pkg.Seq2txtModel = {}
     llm_pkg.TTSModel = {}
     llm_pkg.OcrModel = {}
+
+    class SupportedLiteLLMProvider(StrEnum):
+        Tongyi_Qianwen = "Tongyi-Qianwen"
+        Dashscope = "Dashscope"
+        Moonshot = "Moonshot"
+        ZHIPU_AI = "ZHIPU-AI"
+        OpenAI = "OpenAI"
+        Azure_OpenAI = "Azure-OpenAI"
+        HunYuan = "Tencent Hunyuan"
+
+    llm_pkg.SupportedLiteLLMProvider = SupportedLiteLLMProvider
+    llm_pkg.FACTORY_DEFAULT_BASE_URL = {}
+    llm_pkg.LITELLM_PROVIDER_PREFIX = {}
     sys.modules["rag.llm"] = llm_pkg
 
 
diff --git a/test/unit_test/rag/llm/test_chat_model_thinking_policy.py b/test/unit_test/rag/llm/test_chat_model_thinking_policy.py
new file mode 100644
index 0000000000..cc9ce65351
--- /dev/null
+++ b/test/unit_test/rag/llm/test_chat_model_thinking_policy.py
@@ -0,0 +1,163 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import pytest
+
+from rag.llm import SupportedLiteLLMProvider
+from rag.llm.chat_model import _apply_model_family_policies, _move_litellm_provider_body_fields
+
+pytestmark = pytest.mark.p1
+
+
+def test_qwen3_uses_system_disabled_default():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-plus",
+        backend="base",
+        gen_conf={},
+        request_kwargs={},
+    )
+
+    assert gen_conf == {}
+    assert kwargs["extra_body"]["enable_thinking"] is False
+
+
+def test_qwen3_can_enable_thinking_explicitly():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-plus",
+        backend="base",
+        gen_conf={"thinking": "enabled", "temperature": 0.2},
+        request_kwargs={"extra_body": {"seed": 1}},
+    )
+
+    assert gen_conf == {"temperature": 0.2}
+    assert kwargs["extra_body"] == {"seed": 1, "enable_thinking": True}
+
+
+@pytest.mark.parametrize(
+    "provider",
+    [SupportedLiteLLMProvider.Tongyi_Qianwen, SupportedLiteLLMProvider.Dashscope],
+)
+def test_qwen3_litellm_provider_uses_provider_field(provider):
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-max",
+        backend="litellm",
+        provider=provider,
+        gen_conf={"thinking": "disabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["enable_thinking"] is False
+
+
+def test_kimi_thinking_maps_to_moonshot_payload():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-k2.6-preview",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"thinking": "disabled", "temperature": 0.6},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "disabled"}
+    assert "temperature" not in gen_conf
+
+
+def test_moonshot_explicit_thinking_does_not_require_exact_kimi_model_name():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-latest",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"thinking": "disabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "disabled"}
+
+
+def test_kimi_keeps_provider_default_when_unspecified():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-k2.5-preview",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"temperature": 0.6},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert "thinking" not in gen_conf
+    assert "temperature" not in gen_conf
+    assert gen_conf["top_p"] == 0.95
+    assert gen_conf["n"] == 1
+    assert gen_conf["presence_penalty"] == 0.0
+    assert gen_conf["frequency_penalty"] == 0.0
+
+
+def test_glm_keeps_provider_default_when_unspecified():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "glm-4.7",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.ZHIPU_AI,
+        gen_conf={},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf == {}
+
+
+def test_glm_thinking_maps_to_zhipu_payload():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "glm-4.7",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.ZHIPU_AI,
+        gen_conf={"thinking": "enabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "enabled"}
+
+
+def test_litellm_provider_body_fields_move_to_extra_body_before_drop_params():
+    completion_args = {
+        "model": "kimi-latest",
+        "messages": [],
+        "thinking": {"type": "disabled"},
+        "temperature": 0.2,
+    }
+
+    _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Moonshot, completion_args)
+
+    assert completion_args["extra_body"]["thinking"] == {"type": "disabled"}
+    assert "thinking" not in completion_args
+    assert completion_args["temperature"] == 0.2
+
+
+def test_litellm_provider_body_fields_preserve_existing_extra_body():
+    completion_args = {
+        "model": "qwen3-max",
+        "messages": [],
+        "enable_thinking": False,
+        "extra_body": {"seed": 1},
+    }
+
+    _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Tongyi_Qianwen, completion_args)
+
+    assert completion_args["extra_body"] == {"seed": 1, "enable_thinking": False}
+    assert "enable_thinking" not in completion_args
diff --git a/test/unit_test/rag/llm/test_clean_conf_whitelist.py b/test/unit_test/rag/llm/test_clean_conf_whitelist.py
index 019a27be1a..1ff9abd209 100644
--- a/test/unit_test/rag/llm/test_clean_conf_whitelist.py
+++ b/test/unit_test/rag/llm/test_clean_conf_whitelist.py
@@ -31,8 +31,15 @@ These tests pin the whitelisting behaviour for both backends so the leak
 cannot reappear.
 """
 
+import sys
+from unittest.mock import MagicMock
+
 import pytest
 
+if isinstance(sys.modules.get("rag.llm.chat_model"), MagicMock):
+    del sys.modules["rag.llm.chat_model"]
+
+from rag.llm import SupportedLiteLLMProvider
 from rag.llm.chat_model import (
     ALLOWED_GEN_CONF_KEYS,
     LITELLM_ALLOWED_GEN_CONF_KEYS,
@@ -78,6 +85,13 @@ def test_base_drops_model_type():
     assert cleaned["temperature"] == 0.5
 
 
+def test_base_drops_litellm_reasoning_controls():
+    cleaned = _make_base()._clean_conf({"temperature": 0.5, "thinking": {"type": "enabled"}, "enable_thinking": True})
+    assert "thinking" not in cleaned
+    assert "enable_thinking" not in cleaned
+    assert cleaned["temperature"] == 0.5
+
+
 @pytest.mark.parametrize("stray_key", ["model_type", "llm_id", "parameter", "icon", "foo"])
 def test_litellm_drops_arbitrary_internal_keys(stray_key):
     cleaned = _make_litellm()._clean_conf({stray_key: "x", "top_p": 0.9})
@@ -100,12 +114,20 @@ def test_litellm_preserves_known_generation_params():
 
 
 def test_litellm_preserves_thinking_param():
-    """``thinking`` is injected by the model-family policy for reasoning
-    models and must survive the whitelist (it is a valid LiteLLM param)."""
+    """``thinking`` is a valid LiteLLM parameter even without a provider policy."""
     cleaned = _make_litellm()._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
     assert cleaned["thinking"] == {"type": "enabled"}
 
 
+def test_litellm_preserves_provider_mapped_thinking_param():
+    """Provider-mapped ``thinking`` must survive the LiteLLM whitelist."""
+    cleaned = _make_litellm(
+        "kimi-k2.6-preview",
+        SupportedLiteLLMProvider.Moonshot,
+    )._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
+    assert cleaned["thinking"] == {"type": "enabled"}
+
+
 def test_max_tokens_is_dropped_on_both_backends():
     assert "max_tokens" not in _make_litellm()._clean_conf({"max_tokens": 100, "temperature": 0.3})
     assert "max_tokens" not in _make_base()._clean_conf({"max_tokens": 100, "temperature": 0.3})
diff --git a/web/src/components/llm-setting-items/next.tsx b/web/src/components/llm-setting-items/next.tsx
index 66344a4cf8..8c93c30319 100644
--- a/web/src/components/llm-setting-items/next.tsx
+++ b/web/src/components/llm-setting-items/next.tsx
@@ -38,6 +38,7 @@ interface LlmSettingFieldItemsProps {
     | 'presence_penalty'
     | 'frequency_penalty'
     | 'max_tokens'
+    | 'thinking'
   >;
   showCollapse?: boolean;
 }
@@ -61,6 +62,7 @@ export const LlmSettingFieldSchema = {
   frequency_penalty: z.coerce.number().optional(),
   max_tokens: z.number().optional(),
   parameter: z.string().optional(),
+  thinking: z.enum(['default', 'enabled', 'disabled']).optional(),
 };
 
 export const LlmSettingSchema = {
@@ -80,6 +82,7 @@ export function LlmSettingFieldItems({
     'presence_penalty',
     'frequency_penalty',
     'max_tokens',
+    'thinking',
   ],
   llmId,
   showCollapse = false,
@@ -249,6 +252,41 @@ export function LlmSettingFieldItems({
               }}
             ></SliderInputSwitchFormField>
           )}
+          {showFields.some((item) => item === 'thinking') && (
+            <FormField
+              control={form.control}
+              name={getFieldWithPrefix('thinking')}
+              render={({ field }) => (
+                <FormItem className="flex justify-between items-center">
+                  <FormLabel className="flex-1" tooltip={t('thinkingTip')}>
+                    {t('thinking')}
+                  </FormLabel>
+                  <FormControl>
+                    <Select
+                      value={field.value ?? 'default'}
+                      onValueChange={field.onChange}
+                    >
+                      <SelectTrigger className="flex-1 !m-0">
+                        <SelectValue />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="default">
+                          {t('thinkingDefault')}
+                        </SelectItem>
+                        <SelectItem value="enabled">
+                          {t('thinkingEnabled')}
+                        </SelectItem>
+                        <SelectItem value="disabled">
+                          {t('thinkingDisabled')}
+                        </SelectItem>
+                      </SelectContent>
+                    </Select>
+                  </FormControl>
+                  <FormMessage />
+                </FormItem>
+              )}
+            />
+          )}
         </section>
       </CollapseComponent>
     </div>
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 01517b9ed8..19b5a6ad14 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1056,6 +1056,12 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
       maxTokensTip: `The maximum context size of the model; an invalid or incorrect value will cause an error. Defaults to 512.`,
       maxTokensInvalidMessage: 'Please enter a valid number for Max tokens.',
       maxTokensMinMessage: 'Max tokens cannot be less than 0.',
+      thinking: 'Thinking',
+      thinkingDefault: 'System default',
+      thinkingEnabled: 'Enabled',
+      thinkingDisabled: 'Disabled',
+      thinkingTip:
+        'Only controls thinking mode for official Qwen, Kimi, and GLM model providers. System default disables Qwen thinking to avoid long-running tasks.',
       quote: 'Show quote',
       quoteTip: 'Whether to display the original text as a reference.',
       selfRag: 'Self-RAG',
diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts
index 20d2907003..6f9e29bf52 100644
--- a/web/src/locales/zh-traditional.ts
+++ b/web/src/locales/zh-traditional.ts
@@ -506,6 +506,11 @@ export default {
       maxTokensTip: `模型的最大上下文大小；無效或不正確的值會導致錯誤。預設為 512。`,
       maxTokensInvalidMessage: '請輸入有效的最大標記數。',
       maxTokensMinMessage: '最大標記數不能小於 0。',
+      thinking: '思考',
+      thinkingDefault: '系統預設',
+      thinkingEnabled: '開啟',
+      thinkingDisabled: '關閉',
+      thinkingTip: '僅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系統預設會關閉 Qwen 思考，以避免任務長時間執行。',
       quote: '顯示引文',
       quoteTip: '是否應該顯示原文出處？',
       selfRag: 'Self-RAG',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index ce1d0c868c..ddedd69359 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -957,6 +957,11 @@ NER：使用 spaCy NER 和基于规则的关键词提取来抽取实体和关系
       maxTokensTip: `模型的最大上下文大小；无效或不正确的值会导致错误。默认值为 512。`,
       maxTokensInvalidMessage: '请输入有效的最大令牌数。',
       maxTokensMinMessage: '最大令牌数不能小于 0。',
+      thinking: '思考',
+      thinkingDefault: '系统默认',
+      thinkingEnabled: '开启',
+      thinkingDisabled: '关闭',
+      thinkingTip: '仅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系统默认会关闭 Qwen 思考，以避免任务长时间运行。',
       quote: '显示引文',
       quoteTip: '是否应该显示原文出处？',
       selfRag: 'Self-RAG',