Feat/agent thinking switch (#15446)

### What problem does this PR solve? This PR adds an Agent LLM setting to control thinking mode for official providers that expose a thinking switch. Related to #12842. Closes #15445. Some providers expose thinking controls through provider-specific request fields, but Agent LLM settings did not have a unified option for users to enable or disable thinking mode. This PR adds a `Thinking` selector with: - System default - Enabled - Disabled <img width="452" height="278" alt="8566b0b4-0546-4c8a-913d-f9bbd38319f6" src="https://github.com/user-attachments/assets/25b497f7-1ba0-4bfe-940d-6fe79287d6ab" /> <img width="471" height="971" alt="8a0a6bee-f45f-48d5-bd83-17af260de3db" src="https://github.com/user-attachments/assets/41ad43c1-5087-48f1-bf37-f2ca14c2be2f" /> Initial support is limited to the verified official providers: - Qwen / DashScope: `enable_thinking` - Kimi / Moonshot: `thinking.type` - GLM / ZHIPU-AI: `thinking.type` For LiteLLM-based providers, provider-specific fields are forwarded through `extra_body` before `drop_params` filtering so the request parameters are preserved. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: jiashi <jiashi19@outlook.com> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
2026-06-29 15:31:05 +08:00 · 2026-06-28 12:02:55 +08:00
parent 6a4de82a80
commit 0d7ad0ed0c
9 changed files with 367 additions and 29 deletions
--- a/test/unit_test/rag/llm/conftest.py
+++ b/test/unit_test/rag/llm/conftest.py
@@ -27,6 +27,7 @@ works without triggering the full init.
 import os
 import sys
 import types
+from enum import StrEnum

 # Resolve the real path to rag/llm/ so sub-module imports can find files
 _RAGFLOW_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
@@ -55,6 +56,19 @@ def _install_rag_llm_stub():
    llm_pkg.Seq2txtModel = {}
    llm_pkg.TTSModel = {}
    llm_pkg.OcrModel = {}
+
+    class SupportedLiteLLMProvider(StrEnum):
+        Tongyi_Qianwen = "Tongyi-Qianwen"
+        Dashscope = "Dashscope"
+        Moonshot = "Moonshot"
+        ZHIPU_AI = "ZHIPU-AI"
+        OpenAI = "OpenAI"
+        Azure_OpenAI = "Azure-OpenAI"
+        HunYuan = "Tencent Hunyuan"
+
+    llm_pkg.SupportedLiteLLMProvider = SupportedLiteLLMProvider
+    llm_pkg.FACTORY_DEFAULT_BASE_URL = {}
+    llm_pkg.LITELLM_PROVIDER_PREFIX = {}
    sys.modules["rag.llm"] = llm_pkg


--- a/test/unit_test/rag/llm/test_chat_model_thinking_policy.py
+++ b/test/unit_test/rag/llm/test_chat_model_thinking_policy.py
@@ -0,0 +1,163 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import pytest
+
+from rag.llm import SupportedLiteLLMProvider
+from rag.llm.chat_model import _apply_model_family_policies, _move_litellm_provider_body_fields
+
+pytestmark = pytest.mark.p1
+
+
+def test_qwen3_uses_system_disabled_default():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-plus",
+        backend="base",
+        gen_conf={},
+        request_kwargs={},
+    )
+
+    assert gen_conf == {}
+    assert kwargs["extra_body"]["enable_thinking"] is False
+
+
+def test_qwen3_can_enable_thinking_explicitly():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-plus",
+        backend="base",
+        gen_conf={"thinking": "enabled", "temperature": 0.2},
+        request_kwargs={"extra_body": {"seed": 1}},
+    )
+
+    assert gen_conf == {"temperature": 0.2}
+    assert kwargs["extra_body"] == {"seed": 1, "enable_thinking": True}
+
+
+@pytest.mark.parametrize(
+    "provider",
+    [SupportedLiteLLMProvider.Tongyi_Qianwen, SupportedLiteLLMProvider.Dashscope],
+)
+def test_qwen3_litellm_provider_uses_provider_field(provider):
+    gen_conf, kwargs = _apply_model_family_policies(
+        "qwen3-max",
+        backend="litellm",
+        provider=provider,
+        gen_conf={"thinking": "disabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["enable_thinking"] is False
+
+
+def test_kimi_thinking_maps_to_moonshot_payload():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-k2.6-preview",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"thinking": "disabled", "temperature": 0.6},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "disabled"}
+    assert "temperature" not in gen_conf
+
+
+def test_moonshot_explicit_thinking_does_not_require_exact_kimi_model_name():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-latest",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"thinking": "disabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "disabled"}
+
+
+def test_kimi_keeps_provider_default_when_unspecified():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "kimi-k2.5-preview",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.Moonshot,
+        gen_conf={"temperature": 0.6},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert "thinking" not in gen_conf
+    assert "temperature" not in gen_conf
+    assert gen_conf["top_p"] == 0.95
+    assert gen_conf["n"] == 1
+    assert gen_conf["presence_penalty"] == 0.0
+    assert gen_conf["frequency_penalty"] == 0.0
+
+
+def test_glm_keeps_provider_default_when_unspecified():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "glm-4.7",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.ZHIPU_AI,
+        gen_conf={},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf == {}
+
+
+def test_glm_thinking_maps_to_zhipu_payload():
+    gen_conf, kwargs = _apply_model_family_policies(
+        "glm-4.7",
+        backend="litellm",
+        provider=SupportedLiteLLMProvider.ZHIPU_AI,
+        gen_conf={"thinking": "enabled"},
+        request_kwargs={},
+    )
+
+    assert kwargs == {}
+    assert gen_conf["thinking"] == {"type": "enabled"}
+
+
+def test_litellm_provider_body_fields_move_to_extra_body_before_drop_params():
+    completion_args = {
+        "model": "kimi-latest",
+        "messages": [],
+        "thinking": {"type": "disabled"},
+        "temperature": 0.2,
+    }
+
+    _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Moonshot, completion_args)
+
+    assert completion_args["extra_body"]["thinking"] == {"type": "disabled"}
+    assert "thinking" not in completion_args
+    assert completion_args["temperature"] == 0.2
+
+
+def test_litellm_provider_body_fields_preserve_existing_extra_body():
+    completion_args = {
+        "model": "qwen3-max",
+        "messages": [],
+        "enable_thinking": False,
+        "extra_body": {"seed": 1},
+    }
+
+    _move_litellm_provider_body_fields(SupportedLiteLLMProvider.Tongyi_Qianwen, completion_args)
+
+    assert completion_args["extra_body"] == {"seed": 1, "enable_thinking": False}
+    assert "enable_thinking" not in completion_args
--- a/test/unit_test/rag/llm/test_clean_conf_whitelist.py
+++ b/test/unit_test/rag/llm/test_clean_conf_whitelist.py
@@ -31,8 +31,15 @@ These tests pin the whitelisting behaviour for both backends so the leak
 cannot reappear.
 """

+import sys
+from unittest.mock import MagicMock
+
 import pytest

+if isinstance(sys.modules.get("rag.llm.chat_model"), MagicMock):
+    del sys.modules["rag.llm.chat_model"]
+
+from rag.llm import SupportedLiteLLMProvider
 from rag.llm.chat_model import (
    ALLOWED_GEN_CONF_KEYS,
    LITELLM_ALLOWED_GEN_CONF_KEYS,
@@ -78,6 +85,13 @@ def test_base_drops_model_type():
    assert cleaned["temperature"] == 0.5


+def test_base_drops_litellm_reasoning_controls():
+    cleaned = _make_base()._clean_conf({"temperature": 0.5, "thinking": {"type": "enabled"}, "enable_thinking": True})
+    assert "thinking" not in cleaned
+    assert "enable_thinking" not in cleaned
+    assert cleaned["temperature"] == 0.5
+
+
@pytest.mark.parametrize("stray_key", ["model_type", "llm_id", "parameter", "icon", "foo"])
 def test_litellm_drops_arbitrary_internal_keys(stray_key):
    cleaned = _make_litellm()._clean_conf({stray_key: "x", "top_p": 0.9})
@@ -100,12 +114,20 @@ def test_litellm_preserves_known_generation_params():


 def test_litellm_preserves_thinking_param():
-    """``thinking`` is injected by the model-family policy for reasoning
-    models and must survive the whitelist (it is a valid LiteLLM param)."""
+    """``thinking`` is a valid LiteLLM parameter even without a provider policy."""
    cleaned = _make_litellm()._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
    assert cleaned["thinking"] == {"type": "enabled"}


+def test_litellm_preserves_provider_mapped_thinking_param():
+    """Provider-mapped ``thinking`` must survive the LiteLLM whitelist."""
+    cleaned = _make_litellm(
+        "kimi-k2.6-preview",
+        SupportedLiteLLMProvider.Moonshot,
+    )._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
+    assert cleaned["thinking"] == {"type": "enabled"}
+
+
 def test_max_tokens_is_dropped_on_both_backends():
    assert "max_tokens" not in _make_litellm()._clean_conf({"max_tokens": 100, "temperature": 0.3})
    assert "max_tokens" not in _make_base()._clean_conf({"max_tokens": 100, "temperature": 0.3})