mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Feat/agent thinking switch (#15446)
### What problem does this PR solve? This PR adds an Agent LLM setting to control thinking mode for official providers that expose a thinking switch. Related to #12842. Closes #15445. Some providers expose thinking controls through provider-specific request fields, but Agent LLM settings did not have a unified option for users to enable or disable thinking mode. This PR adds a `Thinking` selector with: - System default - Enabled - Disabled <img width="452" height="278" alt="8566b0b4-0546-4c8a-913d-f9bbd38319f6" src="https://github.com/user-attachments/assets/25b497f7-1ba0-4bfe-940d-6fe79287d6ab" /> <img width="471" height="971" alt="8a0a6bee-f45f-48d5-bd83-17af260de3db" src="https://github.com/user-attachments/assets/41ad43c1-5087-48f1-bf37-f2ca14c2be2f" /> Initial support is limited to the verified official providers: - Qwen / DashScope: `enable_thinking` - Kimi / Moonshot: `thinking.type` - GLM / ZHIPU-AI: `thinking.type` For LiteLLM-based providers, provider-specific fields are forwarded through `extra_body` before `drop_params` filtering so the request parameters are preserved. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: jiashi <jiashi19@outlook.com> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
This commit is contained in:
@@ -49,6 +49,7 @@ class LLMParam(ComponentParamBase):
|
||||
self.output_structure = None
|
||||
self.cite = True
|
||||
self.visual_files_var = None
|
||||
self.thinking = ""
|
||||
|
||||
def check(self):
|
||||
self.check_decimal_float(float(self.temperature), "[Agent] Temperature")
|
||||
@@ -77,6 +78,8 @@ class LLMParam(ComponentParamBase):
|
||||
conf["presence_penalty"] = float(self.presence_penalty)
|
||||
if float(self.frequency_penalty) > 0 and get_attr("frequencyPenaltyEnabled"):
|
||||
conf["frequency_penalty"] = float(self.frequency_penalty)
|
||||
if get_attr("thinking") in {"enabled", "disabled"}:
|
||||
conf["thinking"] = get_attr("thinking")
|
||||
return conf
|
||||
|
||||
|
||||
|
||||
@@ -95,10 +95,12 @@ ALLOWED_GEN_CONF_KEYS = frozenset(
|
||||
|
||||
# LiteLLM additionally understands reasoning-control parameters that the
|
||||
# model-family policies may inject into `gen_conf` (e.g. `thinking` for
|
||||
# Anthropic / Kimi reasoning models, `reasoning_effort` for OpenAI o-series).
|
||||
# Anthropic / Kimi reasoning models, `enable_thinking` for Qwen models,
|
||||
# `reasoning_effort` for OpenAI o-series).
|
||||
LITELLM_ALLOWED_GEN_CONF_KEYS = ALLOWED_GEN_CONF_KEYS | frozenset(
|
||||
{
|
||||
"thinking",
|
||||
"enable_thinking",
|
||||
"reasoning_effort",
|
||||
"extra_body",
|
||||
}
|
||||
@@ -117,9 +119,43 @@ def _apply_model_family_policies(
|
||||
sanitized_gen_conf = deepcopy(gen_conf) if gen_conf else {}
|
||||
sanitized_kwargs = dict(request_kwargs) if request_kwargs else {}
|
||||
|
||||
# Qwen3 family disables thinking by extra_body on non-stream chat requests.
|
||||
def _thinking_type():
|
||||
val = sanitized_gen_conf.get("thinking")
|
||||
if isinstance(val, dict):
|
||||
val = val.get("type")
|
||||
|
||||
enable_thinking = sanitized_gen_conf.get("enable_thinking")
|
||||
|
||||
if isinstance(val, str) and val in {"enabled", "disabled"}:
|
||||
return val
|
||||
if isinstance(enable_thinking, bool):
|
||||
return "enabled" if enable_thinking else "disabled"
|
||||
return None
|
||||
|
||||
def _pop_thinking_controls():
|
||||
sanitized_gen_conf.pop("thinking", None)
|
||||
sanitized_gen_conf.pop("enable_thinking", None)
|
||||
|
||||
def _merge_extra_body(target: dict, extra: dict) -> None:
|
||||
body = target.get("extra_body")
|
||||
if not isinstance(body, dict):
|
||||
body = {}
|
||||
body.update(extra)
|
||||
target["extra_body"] = body
|
||||
|
||||
thinking_type = _thinking_type()
|
||||
|
||||
# Qwen3 keeps RAGFlow's system default of disabling thinking unless explicitly overridden.
|
||||
if "qwen3" in model_name_lower:
|
||||
sanitized_kwargs["extra_body"] = {"enable_thinking": False}
|
||||
_pop_thinking_controls()
|
||||
enable_thinking = thinking_type == "enabled" if thinking_type else False
|
||||
if backend == "litellm" and provider in {
|
||||
SupportedLiteLLMProvider.Tongyi_Qianwen,
|
||||
SupportedLiteLLMProvider.Dashscope,
|
||||
}:
|
||||
sanitized_gen_conf["enable_thinking"] = enable_thinking
|
||||
else:
|
||||
_merge_extra_body(sanitized_kwargs, {"enable_thinking": enable_thinking})
|
||||
|
||||
if backend == "base":
|
||||
return sanitized_gen_conf, sanitized_kwargs
|
||||
@@ -137,27 +173,50 @@ def _apply_model_family_policies(
|
||||
if provider == SupportedLiteLLMProvider.HunYuan:
|
||||
for key in ("presence_penalty", "frequency_penalty"):
|
||||
sanitized_gen_conf.pop(key, None)
|
||||
elif "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower:
|
||||
reasoning = sanitized_gen_conf.pop("reasoning", None)
|
||||
thinking = {"type": "enabled"}
|
||||
if reasoning is not None:
|
||||
thinking = {"type": "enabled"} if reasoning else {"type": "disabled"}
|
||||
elif not isinstance(thinking, dict) or thinking.get("type") not in {"enabled", "disabled"}:
|
||||
thinking = {"type": "disabled"}
|
||||
sanitized_gen_conf["thinking"] = thinking
|
||||
elif provider == SupportedLiteLLMProvider.Moonshot:
|
||||
if thinking_type:
|
||||
_pop_thinking_controls()
|
||||
sanitized_gen_conf["thinking"] = {"type": thinking_type}
|
||||
|
||||
thinking_enabled = thinking.get("type") == "enabled"
|
||||
sanitized_gen_conf["temperature"] = 1.0 if thinking_enabled else 0.6
|
||||
sanitized_gen_conf["top_p"] = 0.95
|
||||
sanitized_gen_conf["n"] = 1
|
||||
sanitized_gen_conf["presence_penalty"] = 0.0
|
||||
sanitized_gen_conf["frequency_penalty"] = 0.0
|
||||
if thinking_type or "kimi-k2.5" in model_name_lower or "kimi-k2.6" in model_name_lower:
|
||||
sanitized_gen_conf.pop("temperature", None)
|
||||
sanitized_gen_conf["top_p"] = 0.95
|
||||
sanitized_gen_conf["n"] = 1
|
||||
sanitized_gen_conf["presence_penalty"] = 0.0
|
||||
sanitized_gen_conf["frequency_penalty"] = 0.0
|
||||
elif (
|
||||
provider == SupportedLiteLLMProvider.ZHIPU_AI
|
||||
and "glm" in model_name_lower
|
||||
and thinking_type
|
||||
):
|
||||
_pop_thinking_controls()
|
||||
sanitized_gen_conf["thinking"] = {"type": thinking_type}
|
||||
|
||||
return sanitized_gen_conf, sanitized_kwargs
|
||||
|
||||
return sanitized_gen_conf, sanitized_kwargs
|
||||
|
||||
|
||||
def _move_litellm_provider_body_fields(provider: SupportedLiteLLMProvider | str | None, completion_args: dict) -> dict:
|
||||
provider_body_fields = {
|
||||
SupportedLiteLLMProvider.Tongyi_Qianwen: {"enable_thinking"},
|
||||
SupportedLiteLLMProvider.Dashscope: {"enable_thinking"},
|
||||
SupportedLiteLLMProvider.Moonshot: {"thinking"},
|
||||
SupportedLiteLLMProvider.ZHIPU_AI: {"thinking"},
|
||||
}.get(provider, set())
|
||||
|
||||
body = completion_args.get("extra_body")
|
||||
if not isinstance(body, dict):
|
||||
body = {}
|
||||
moved = False
|
||||
for key in provider_body_fields:
|
||||
if key in completion_args:
|
||||
body[key] = completion_args.pop(key)
|
||||
moved = True
|
||||
if moved or body:
|
||||
completion_args["extra_body"] = body
|
||||
return completion_args
|
||||
|
||||
class Base(ABC):
|
||||
def __init__(self, key, model_name, base_url, **kwargs):
|
||||
timeout = int(os.environ.get("LLM_TIMEOUT_SECONDS", 600))
|
||||
@@ -197,12 +256,6 @@ class Base(ABC):
|
||||
return LLMErrorCode.ERROR_GENERIC
|
||||
|
||||
def _clean_conf(self, gen_conf):
|
||||
gen_conf, _ = _apply_model_family_policies(
|
||||
self.model_name,
|
||||
backend="base",
|
||||
gen_conf=gen_conf,
|
||||
)
|
||||
|
||||
if "max_tokens" in gen_conf:
|
||||
del gen_conf["max_tokens"]
|
||||
|
||||
@@ -213,10 +266,17 @@ class Base(ABC):
|
||||
logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4))
|
||||
reasoning_start = False
|
||||
|
||||
gen_conf, extra_request_kwargs = _apply_model_family_policies(
|
||||
self.model_name,
|
||||
backend="base",
|
||||
gen_conf=gen_conf,
|
||||
request_kwargs={},
|
||||
)
|
||||
request_kwargs = {"model": self.model_name, "messages": history, "stream": True, **gen_conf}
|
||||
stop = kwargs.get("stop")
|
||||
if stop:
|
||||
request_kwargs["stop"] = stop
|
||||
request_kwargs.update(extra_request_kwargs)
|
||||
|
||||
response = await self.async_client.chat.completions.create(**request_kwargs)
|
||||
async for resp in response:
|
||||
@@ -407,6 +467,12 @@ class Base(ABC):
|
||||
async def async_chat_with_tools(self, system: str, history: list, gen_conf: dict | None = None):
|
||||
gen_conf = dict(gen_conf or {})
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
gen_conf, extra_request_kwargs = _apply_model_family_policies(
|
||||
self.model_name,
|
||||
backend="base",
|
||||
gen_conf=gen_conf,
|
||||
request_kwargs={},
|
||||
)
|
||||
if system and history and history[0].get("role") != "system":
|
||||
history.insert(0, {"role": "system", "content": system})
|
||||
|
||||
@@ -418,7 +484,7 @@ class Base(ABC):
|
||||
try:
|
||||
for _ in range(self.max_rounds + 1):
|
||||
logging.info(f"{self.tools=}")
|
||||
response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
|
||||
response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf, **extra_request_kwargs)
|
||||
tk_count += total_token_count_from_response(response)
|
||||
if not response.choices or not response.choices[0].message:
|
||||
raise Exception(f"500 response structure error. Response: {response}")
|
||||
@@ -473,6 +539,12 @@ class Base(ABC):
|
||||
async def async_chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict | None = None):
|
||||
gen_conf = dict(gen_conf or {})
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
gen_conf, extra_request_kwargs = _apply_model_family_policies(
|
||||
self.model_name,
|
||||
backend="base",
|
||||
gen_conf=gen_conf,
|
||||
request_kwargs={},
|
||||
)
|
||||
tools = self.tools
|
||||
if system and history and history[0].get("role") != "system":
|
||||
history.insert(0, {"role": "system", "content": system})
|
||||
@@ -487,7 +559,7 @@ class Base(ABC):
|
||||
reasoning_start = False
|
||||
logging.info(f"[ToolLoop] round={_round} model={self.model_name} tools={[t['function']['name'] for t in tools]}")
|
||||
|
||||
response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
|
||||
response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf, **extra_request_kwargs)
|
||||
|
||||
final_tool_calls = {}
|
||||
answer = ""
|
||||
@@ -573,7 +645,15 @@ class Base(ABC):
|
||||
logging.warning(f"Exceed max rounds: {self.max_rounds}")
|
||||
history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
|
||||
|
||||
response = await self.async_client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
|
||||
response = await self.async_client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=history,
|
||||
stream=True,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
**gen_conf,
|
||||
**extra_request_kwargs,
|
||||
)
|
||||
|
||||
async for resp in response:
|
||||
if not hasattr(resp, "choices") or not resp.choices:
|
||||
@@ -619,9 +699,10 @@ class Base(ABC):
|
||||
|
||||
return final_ans.strip(), tol_token
|
||||
|
||||
_, kwargs = _apply_model_family_policies(
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
self.model_name,
|
||||
backend="base",
|
||||
gen_conf=gen_conf,
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
|
||||
@@ -2080,6 +2161,7 @@ class LiteLLMBase(ABC):
|
||||
api_base = completion_args.get("api_base", self.base_url)
|
||||
separator = "&" if "?" in api_base else "?"
|
||||
completion_args["api_base"] = f"{api_base}{separator}GroupId={self.group_id}"
|
||||
_move_litellm_provider_body_fields(self.provider, completion_args)
|
||||
if extra_headers:
|
||||
completion_args["extra_headers"] = extra_headers
|
||||
return completion_args
|
||||
|
||||
@@ -27,6 +27,7 @@ works without triggering the full init.
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from enum import StrEnum
|
||||
|
||||
# Resolve the real path to rag/llm/ so sub-module imports can find files
|
||||
_RAGFLOW_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
@@ -55,6 +56,19 @@ def _install_rag_llm_stub():
|
||||
llm_pkg.Seq2txtModel = {}
|
||||
llm_pkg.TTSModel = {}
|
||||
llm_pkg.OcrModel = {}
|
||||
|
||||
class SupportedLiteLLMProvider(StrEnum):
|
||||
Tongyi_Qianwen = "Tongyi-Qianwen"
|
||||
Dashscope = "Dashscope"
|
||||
Moonshot = "Moonshot"
|
||||
ZHIPU_AI = "ZHIPU-AI"
|
||||
OpenAI = "OpenAI"
|
||||
Azure_OpenAI = "Azure-OpenAI"
|
||||
HunYuan = "Tencent Hunyuan"
|
||||
|
||||
llm_pkg.SupportedLiteLLMProvider = SupportedLiteLLMProvider
|
||||
llm_pkg.FACTORY_DEFAULT_BASE_URL = {}
|
||||
llm_pkg.LITELLM_PROVIDER_PREFIX = {}
|
||||
sys.modules["rag.llm"] = llm_pkg
|
||||
|
||||
|
||||
|
||||
163
test/unit_test/rag/llm/test_chat_model_thinking_policy.py
Normal file
163
test/unit_test/rag/llm/test_chat_model_thinking_policy.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import pytest
|
||||
|
||||
from rag.llm import SupportedLiteLLMProvider
|
||||
from rag.llm.chat_model import _apply_model_family_policies, _move_litellm_provider_body_fields
|
||||
|
||||
pytestmark = pytest.mark.p1
|
||||
|
||||
|
||||
def test_qwen3_uses_system_disabled_default():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"qwen3-plus",
|
||||
backend="base",
|
||||
gen_conf={},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert gen_conf == {}
|
||||
assert kwargs["extra_body"]["enable_thinking"] is False
|
||||
|
||||
|
||||
def test_qwen3_can_enable_thinking_explicitly():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"qwen3-plus",
|
||||
backend="base",
|
||||
gen_conf={"thinking": "enabled", "temperature": 0.2},
|
||||
request_kwargs={"extra_body": {"seed": 1}},
|
||||
)
|
||||
|
||||
assert gen_conf == {"temperature": 0.2}
|
||||
assert kwargs["extra_body"] == {"seed": 1, "enable_thinking": True}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider",
|
||||
[SupportedLiteLLMProvider.Tongyi_Qianwen, SupportedLiteLLMProvider.Dashscope],
|
||||
)
|
||||
def test_qwen3_litellm_provider_uses_provider_field(provider):
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"qwen3-max",
|
||||
backend="litellm",
|
||||
provider=provider,
|
||||
gen_conf={"thinking": "disabled"},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert gen_conf["enable_thinking"] is False
|
||||
|
||||
|
||||
def test_kimi_thinking_maps_to_moonshot_payload():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"kimi-k2.6-preview",
|
||||
backend="litellm",
|
||||
provider=SupportedLiteLLMProvider.Moonshot,
|
||||
gen_conf={"thinking": "disabled", "temperature": 0.6},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert gen_conf["thinking"] == {"type": "disabled"}
|
||||
assert "temperature" not in gen_conf
|
||||
|
||||
|
||||
def test_moonshot_explicit_thinking_does_not_require_exact_kimi_model_name():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"kimi-latest",
|
||||
backend="litellm",
|
||||
provider=SupportedLiteLLMProvider.Moonshot,
|
||||
gen_conf={"thinking": "disabled"},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert gen_conf["thinking"] == {"type": "disabled"}
|
||||
|
||||
|
||||
def test_kimi_keeps_provider_default_when_unspecified():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"kimi-k2.5-preview",
|
||||
backend="litellm",
|
||||
provider=SupportedLiteLLMProvider.Moonshot,
|
||||
gen_conf={"temperature": 0.6},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert "thinking" not in gen_conf
|
||||
assert "temperature" not in gen_conf
|
||||
assert gen_conf["top_p"] == 0.95
|
||||
assert gen_conf["n"] == 1
|
||||
assert gen_conf["presence_penalty"] == 0.0
|
||||
assert gen_conf["frequency_penalty"] == 0.0
|
||||
|
||||
|
||||
def test_glm_keeps_provider_default_when_unspecified():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"glm-4.7",
|
||||
backend="litellm",
|
||||
provider=SupportedLiteLLMProvider.ZHIPU_AI,
|
||||
gen_conf={},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert gen_conf == {}
|
||||
|
||||
|
||||
def test_glm_thinking_maps_to_zhipu_payload():
|
||||
gen_conf, kwargs = _apply_model_family_policies(
|
||||
"glm-4.7",
|
||||
backend="litellm",
|
||||
provider=SupportedLiteLLMProvider.ZHIPU_AI,
|
||||
gen_conf={"thinking": "enabled"},
|
||||
request_kwargs={},
|
||||
)
|
||||
|
||||
assert kwargs == {}
|
||||
assert gen_conf["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
def test_litellm_provider_body_fields_move_to_extra_body_before_drop_params():
|
||||
completion_args = {
|
||||
"model": "kimi-latest",
|
||||
"messages": [],
|
||||
"thinking": {"type": "disabled"},
|
||||
"temperature": 0.2,
|
||||
}
|
||||
|
||||
_move_litellm_provider_body_fields(SupportedLiteLLMProvider.Moonshot, completion_args)
|
||||
|
||||
assert completion_args["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
assert "thinking" not in completion_args
|
||||
assert completion_args["temperature"] == 0.2
|
||||
|
||||
|
||||
def test_litellm_provider_body_fields_preserve_existing_extra_body():
|
||||
completion_args = {
|
||||
"model": "qwen3-max",
|
||||
"messages": [],
|
||||
"enable_thinking": False,
|
||||
"extra_body": {"seed": 1},
|
||||
}
|
||||
|
||||
_move_litellm_provider_body_fields(SupportedLiteLLMProvider.Tongyi_Qianwen, completion_args)
|
||||
|
||||
assert completion_args["extra_body"] == {"seed": 1, "enable_thinking": False}
|
||||
assert "enable_thinking" not in completion_args
|
||||
@@ -31,8 +31,15 @@ These tests pin the whitelisting behaviour for both backends so the leak
|
||||
cannot reappear.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
if isinstance(sys.modules.get("rag.llm.chat_model"), MagicMock):
|
||||
del sys.modules["rag.llm.chat_model"]
|
||||
|
||||
from rag.llm import SupportedLiteLLMProvider
|
||||
from rag.llm.chat_model import (
|
||||
ALLOWED_GEN_CONF_KEYS,
|
||||
LITELLM_ALLOWED_GEN_CONF_KEYS,
|
||||
@@ -78,6 +85,13 @@ def test_base_drops_model_type():
|
||||
assert cleaned["temperature"] == 0.5
|
||||
|
||||
|
||||
def test_base_drops_litellm_reasoning_controls():
|
||||
cleaned = _make_base()._clean_conf({"temperature": 0.5, "thinking": {"type": "enabled"}, "enable_thinking": True})
|
||||
assert "thinking" not in cleaned
|
||||
assert "enable_thinking" not in cleaned
|
||||
assert cleaned["temperature"] == 0.5
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stray_key", ["model_type", "llm_id", "parameter", "icon", "foo"])
|
||||
def test_litellm_drops_arbitrary_internal_keys(stray_key):
|
||||
cleaned = _make_litellm()._clean_conf({stray_key: "x", "top_p": 0.9})
|
||||
@@ -100,12 +114,20 @@ def test_litellm_preserves_known_generation_params():
|
||||
|
||||
|
||||
def test_litellm_preserves_thinking_param():
|
||||
"""``thinking`` is injected by the model-family policy for reasoning
|
||||
models and must survive the whitelist (it is a valid LiteLLM param)."""
|
||||
"""``thinking`` is a valid LiteLLM parameter even without a provider policy."""
|
||||
cleaned = _make_litellm()._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
|
||||
assert cleaned["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
def test_litellm_preserves_provider_mapped_thinking_param():
|
||||
"""Provider-mapped ``thinking`` must survive the LiteLLM whitelist."""
|
||||
cleaned = _make_litellm(
|
||||
"kimi-k2.6-preview",
|
||||
SupportedLiteLLMProvider.Moonshot,
|
||||
)._clean_conf({"thinking": {"type": "enabled"}, "temperature": 1.0})
|
||||
assert cleaned["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
def test_max_tokens_is_dropped_on_both_backends():
|
||||
assert "max_tokens" not in _make_litellm()._clean_conf({"max_tokens": 100, "temperature": 0.3})
|
||||
assert "max_tokens" not in _make_base()._clean_conf({"max_tokens": 100, "temperature": 0.3})
|
||||
|
||||
@@ -38,6 +38,7 @@ interface LlmSettingFieldItemsProps {
|
||||
| 'presence_penalty'
|
||||
| 'frequency_penalty'
|
||||
| 'max_tokens'
|
||||
| 'thinking'
|
||||
>;
|
||||
showCollapse?: boolean;
|
||||
}
|
||||
@@ -61,6 +62,7 @@ export const LlmSettingFieldSchema = {
|
||||
frequency_penalty: z.coerce.number().optional(),
|
||||
max_tokens: z.number().optional(),
|
||||
parameter: z.string().optional(),
|
||||
thinking: z.enum(['default', 'enabled', 'disabled']).optional(),
|
||||
};
|
||||
|
||||
export const LlmSettingSchema = {
|
||||
@@ -80,6 +82,7 @@ export function LlmSettingFieldItems({
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'max_tokens',
|
||||
'thinking',
|
||||
],
|
||||
llmId,
|
||||
showCollapse = false,
|
||||
@@ -249,6 +252,41 @@ export function LlmSettingFieldItems({
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
{showFields.some((item) => item === 'thinking') && (
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={getFieldWithPrefix('thinking')}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex justify-between items-center">
|
||||
<FormLabel className="flex-1" tooltip={t('thinkingTip')}>
|
||||
{t('thinking')}
|
||||
</FormLabel>
|
||||
<FormControl>
|
||||
<Select
|
||||
value={field.value ?? 'default'}
|
||||
onValueChange={field.onChange}
|
||||
>
|
||||
<SelectTrigger className="flex-1 !m-0">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="default">
|
||||
{t('thinkingDefault')}
|
||||
</SelectItem>
|
||||
<SelectItem value="enabled">
|
||||
{t('thinkingEnabled')}
|
||||
</SelectItem>
|
||||
<SelectItem value="disabled">
|
||||
{t('thinkingDisabled')}
|
||||
</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
)}
|
||||
</section>
|
||||
</CollapseComponent>
|
||||
</div>
|
||||
|
||||
@@ -1056,6 +1056,12 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
|
||||
maxTokensTip: `The maximum context size of the model; an invalid or incorrect value will cause an error. Defaults to 512.`,
|
||||
maxTokensInvalidMessage: 'Please enter a valid number for Max tokens.',
|
||||
maxTokensMinMessage: 'Max tokens cannot be less than 0.',
|
||||
thinking: 'Thinking',
|
||||
thinkingDefault: 'System default',
|
||||
thinkingEnabled: 'Enabled',
|
||||
thinkingDisabled: 'Disabled',
|
||||
thinkingTip:
|
||||
'Only controls thinking mode for official Qwen, Kimi, and GLM model providers. System default disables Qwen thinking to avoid long-running tasks.',
|
||||
quote: 'Show quote',
|
||||
quoteTip: 'Whether to display the original text as a reference.',
|
||||
selfRag: 'Self-RAG',
|
||||
|
||||
@@ -506,6 +506,11 @@ export default {
|
||||
maxTokensTip: `模型的最大上下文大小;無效或不正確的值會導致錯誤。預設為 512。`,
|
||||
maxTokensInvalidMessage: '請輸入有效的最大標記數。',
|
||||
maxTokensMinMessage: '最大標記數不能小於 0。',
|
||||
thinking: '思考',
|
||||
thinkingDefault: '系統預設',
|
||||
thinkingEnabled: '開啟',
|
||||
thinkingDisabled: '關閉',
|
||||
thinkingTip: '僅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系統預設會關閉 Qwen 思考,以避免任務長時間執行。',
|
||||
quote: '顯示引文',
|
||||
quoteTip: '是否應該顯示原文出處?',
|
||||
selfRag: 'Self-RAG',
|
||||
|
||||
@@ -957,6 +957,11 @@ NER:使用 spaCy NER 和基于规则的关键词提取来抽取实体和关系
|
||||
maxTokensTip: `模型的最大上下文大小;无效或不正确的值会导致错误。默认值为 512。`,
|
||||
maxTokensInvalidMessage: '请输入有效的最大令牌数。',
|
||||
maxTokensMinMessage: '最大令牌数不能小于 0。',
|
||||
thinking: '思考',
|
||||
thinkingDefault: '系统默认',
|
||||
thinkingEnabled: '开启',
|
||||
thinkingDisabled: '关闭',
|
||||
thinkingTip: '仅控制官方模型提供商中的 Qwen、Kimi 和 GLM 模型思考模式。系统默认会关闭 Qwen 思考,以避免任务长时间运行。',
|
||||
quote: '显示引文',
|
||||
quoteTip: '是否应该显示原文出处?',
|
||||
selfRag: 'Self-RAG',
|
||||
|
||||
Reference in New Issue
Block a user