mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? Fixes #15427. All LiteLLM-routed chats fail with: - Anthropic: `litellm.BadRequestError: AnthropicException - {"type":"invalid_request_error","message":"model_type: Extra inputs are not permitted"}` - OpenAI: `litellm.BadRequestError: OpenAIException - Unknown parameter: 'model_type'` This is a regression from v0.25.4. #### Root cause A chat assistant's `llm_setting` is forwarded to the model as `gen_conf`. `llm_setting` can legitimately carry RAGFlow-internal metadata such as `model_type` (the chat REST APIs in `api/apps/restful_apis/` read it back out of `llm_setting`), so that key ends up inside `gen_conf`. `Base._clean_conf` (OpenAI-compatible providers) already **whitelists** the keys it forwards, so direct-OpenAI providers were unaffected. `LiteLLMBase._clean_conf` only dropped `max_tokens` and passed everything else straight through to `litellm.acompletion`, which forwarded `model_type` to the upstream provider — and Anthropic / OpenAI reject it. Because both Claude and GPT route through LiteLLM, every chat broke. #### Fix - Extract the allowed-key set into a shared `ALLOWED_GEN_CONF_KEYS` constant and reuse it in `Base._clean_conf`. - Apply the same whitelist in `LiteLLMBase._clean_conf`, plus the LiteLLM-specific reasoning params (`thinking`, `reasoning_effort`, `extra_body`) that the model-family policies inject for reasoning models. This covers all four LiteLLM completion paths (`async_chat`, `async_chat_streamly`, `async_chat_with_tools`, `async_chat_streamly_with_tools`), since they all route through `_clean_conf`. #### Tests Adds `test/unit_test/rag/llm/test_clean_conf_whitelist.py` covering both backends: `model_type` (and other stray keys) are dropped, genuine generation params and `thinking` survive, `max_tokens` is removed, and the whitelist invariants hold. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Added test cases
59 lines
2.3 KiB
Python
59 lines
2.3 KiB
Python
#
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
"""
|
|
Mock heavy dependencies that graphrag/utils.py transitively imports,
|
|
so unit tests can run without infrastructure services (Redis, Elasticsearch, etc.).
|
|
"""
|
|
|
|
import sys
|
|
from unittest.mock import MagicMock
|
|
|
|
_modules_to_mock = [
|
|
"quart",
|
|
"common.connection_utils",
|
|
"common.settings",
|
|
"common.doc_store",
|
|
"common.doc_store.doc_store_base",
|
|
"api.db.services",
|
|
"api.db.services.task_service",
|
|
"rag.graphrag.general.leiden",
|
|
"rag.llm.chat_model",
|
|
"rag.nlp",
|
|
"rag.nlp.search",
|
|
"rag.nlp.rag_tokenizer",
|
|
"rag.utils.redis_conn",
|
|
]
|
|
|
|
for mod_name in _modules_to_mock:
|
|
if mod_name not in sys.modules:
|
|
sys.modules[mod_name] = MagicMock()
|
|
|
|
# Ensure `from common.connection_utils import timeout` returns a no-op decorator
|
|
sys.modules["common.connection_utils"].timeout = lambda *a, **kw: (lambda fn: fn)
|
|
sys.modules["api.db.services.task_service"].has_canceled = lambda *_a, **_kw: False
|
|
sys.modules["rag.graphrag.general.leiden"].run = lambda *_a, **_kw: {}
|
|
sys.modules["rag.graphrag.general.leiden"].add_community_info2graph = lambda *_a, **_kw: None
|
|
# Only stub ``Base`` when we actually mocked chat_model. This conftest mutates
|
|
# the global sys.modules at import time, and rag/graphrag/ is collected before
|
|
# rag/llm/. If an earlier test package already imported the real chat_model,
|
|
# unconditionally assigning ``Base = object`` clobbered the genuine class and
|
|
# leaked into the rag/llm unit tests that import it (AttributeError: no
|
|
# attribute '_clean_conf'). graphrag only uses ``Base`` as a type alias, so the
|
|
# real class works just as well when it is already loaded.
|
|
if isinstance(sys.modules["rag.llm.chat_model"], MagicMock):
|
|
sys.modules["rag.llm.chat_model"].Base = object
|