mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
Fix: Chat completion generation parameter overrides (#15389)
### What problem does this PR solve? Closes #15388. Chat completion routes did not reliably honor per-request generation settings: - `/api/v1/chat/completions` copied generation settings with a truthiness check, so valid zero values such as `temperature: 0`, `top_p: 0`, `frequency_penalty: 0`, `presence_penalty: 0`, and `max_tokens: 0` were dropped. - `/api/v1/openai/{chat_id}/chat/completions` did not forward standard generation settings into the request-specific dialog LLM settings before calling `async_chat`. This PR preserves explicitly supplied generation parameters, including zero values, and merges request-level overrides into existing dialog settings where appropriate. The supported generation parameter keys and merge behavior live in a shared REST API helper to keep both completion routes aligned. Validation: - `git diff --check` - `python3 -m py_compile api/apps/restful_apis/_generation_params.py api/apps/restful_apis/chat_api.py api/apps/restful_apis/openai_api.py test/testcases/test_http_api/test_session_management/test_session_sdk_routes_unit.py` - `uv run ruff check api/apps/restful_apis/_generation_params.py api/apps/restful_apis/chat_api.py api/apps/restful_apis/openai_api.py test/testcases/test_http_api/test_session_management/test_session_sdk_routes_unit.py` - `ZHIPU_AI_API_KEY=dummy uv run pytest test/testcases/test_http_api/test_session_management/test_session_sdk_routes_unit.py -q -k generation_params` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -486,6 +486,19 @@ def _load_session_module(monkeypatch):
|
||||
raise Exception("Model Name is required")
|
||||
return _MockModelConfig2(tenant_id, model_name, model_type).to_dict()
|
||||
|
||||
def _get_api_key(tenant_id: str, model_name: str):
|
||||
if not tenant_id or not model_name:
|
||||
return None
|
||||
return "fake-api-key"
|
||||
|
||||
def _split_model_name(model_name: str):
|
||||
parts = model_name.split("@")
|
||||
if len(parts) == 1:
|
||||
return parts[0], "", ""
|
||||
if len(parts) == 2:
|
||||
return parts[0], "default", parts[1]
|
||||
return parts[0], parts[1], parts[2]
|
||||
|
||||
def _get_tenant_default_model_by_type(tenant_id: str, model_type):
|
||||
# Check if tenant exists
|
||||
from api.db.services.tenant_llm_service import TenantService
|
||||
@@ -527,6 +540,8 @@ def _load_session_module(monkeypatch):
|
||||
tenant_model_service_mod.get_model_config_by_id = _get_model_config_by_id
|
||||
tenant_model_service_mod.get_model_config_from_provider_instance = _get_model_config_from_provider_instance
|
||||
tenant_model_service_mod.get_tenant_default_model_by_type = _get_tenant_default_model_by_type
|
||||
tenant_model_service_mod.get_api_key = _get_api_key
|
||||
tenant_model_service_mod.split_model_name = _split_model_name
|
||||
monkeypatch.setitem(sys.modules, "api.db.joint_services.tenant_model_service", tenant_model_service_mod)
|
||||
|
||||
agent_pkg = ModuleType("agent")
|
||||
@@ -1152,6 +1167,58 @@ def test_openai_invalid_message_content_type_unit(monkeypatch):
|
||||
assert "messages[].content must be a string or an array of content parts." in res["message"]
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_openai_nonstream_forwards_generation_params_unit(monkeypatch):
|
||||
module = _load_openai_api_module(monkeypatch)
|
||||
|
||||
base_llm_setting = {"temperature": 0.7, "model_type": "chat"}
|
||||
dia = SimpleNamespace(
|
||||
kb_ids=[],
|
||||
llm_id="chat-model",
|
||||
tenant_id="tenant-1",
|
||||
llm_setting=base_llm_setting,
|
||||
)
|
||||
captured = {}
|
||||
|
||||
monkeypatch.setattr(module, "num_tokens_from_string", lambda text: len(text or ""))
|
||||
monkeypatch.setattr(module.DialogService, "query", lambda **_kwargs: [dia])
|
||||
|
||||
async def fake_async_chat(captured_dia, _msg, _stream, **_kwargs):
|
||||
captured["llm_setting"] = dict(captured_dia.llm_setting)
|
||||
yield {"answer": "world", "reference": {}}
|
||||
|
||||
monkeypatch.setattr(module, "async_chat", fake_async_chat)
|
||||
monkeypatch.setattr(
|
||||
module,
|
||||
"get_request_json",
|
||||
lambda: _AwaitableValue(
|
||||
{
|
||||
"model": "model",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
"stream": False,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"max_tokens": 0,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
res = _run(inspect.unwrap(module.openai_chat_completions)("chat-1"))
|
||||
|
||||
assert res["choices"][0]["message"]["content"] == "world"
|
||||
assert captured["llm_setting"] == {
|
||||
"temperature": 0,
|
||||
"model_type": "chat",
|
||||
"top_p": 0,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"max_tokens": 0,
|
||||
}
|
||||
assert base_llm_setting == {"temperature": 0.7, "model_type": "chat"}
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_agents_openai_compatibility_unit(monkeypatch):
|
||||
module = _load_agent_api_module(monkeypatch)
|
||||
@@ -2201,6 +2268,9 @@ def _load_chat_api_module(monkeypatch):
|
||||
|
||||
tenant_model_svc = ModuleType("api.db.joint_services.tenant_model_service")
|
||||
tenant_model_svc.get_tenant_default_model_by_type = lambda *_a, **_k: {}
|
||||
tenant_model_svc.get_model_config_from_provider_instance = lambda **_k: {}
|
||||
tenant_model_svc.get_api_key = lambda **_k: "fake-api-key"
|
||||
tenant_model_svc.split_model_name = lambda model_name: (model_name, "", "")
|
||||
monkeypatch.setitem(sys.modules, "api.db.joint_services.tenant_model_service", tenant_model_svc)
|
||||
|
||||
chunk_feedback_mod = ModuleType("api.db.services.chunk_feedback_service")
|
||||
@@ -2274,7 +2344,7 @@ def _load_chat_api_module(monkeypatch):
|
||||
|
||||
user_svc_mod = ModuleType("api.db.services.user_service")
|
||||
user_svc_mod.TenantService = SimpleNamespace(
|
||||
get_by_id=lambda _id: (True, SimpleNamespace(id=_id)),
|
||||
get_by_id=lambda _id: (True, SimpleNamespace(id=_id, llm_id="chat-model")),
|
||||
get_joined_tenants_by_user_id=lambda _id: [],
|
||||
)
|
||||
user_svc_mod.UserTenantService = SimpleNamespace(query=lambda **_k: [])
|
||||
@@ -2427,6 +2497,117 @@ def test_session_completion_uses_server_history_by_default(monkeypatch):
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_session_completion_preserves_zero_generation_params(monkeypatch):
|
||||
module = _load_chat_api_module(monkeypatch)
|
||||
|
||||
captured = {}
|
||||
|
||||
async def _fake_async_chat(dia, _messages, stream=True, **_kwargs):
|
||||
captured["llm_setting"] = dict(dia.llm_setting)
|
||||
captured["kwargs"] = dict(_kwargs)
|
||||
yield {"answer": "ok", "reference": {}}
|
||||
|
||||
monkeypatch.setattr(module, "async_chat", _fake_async_chat)
|
||||
monkeypatch.setattr(module, "structure_answer", lambda _conv, ans, _message_id, _session_id: ans)
|
||||
monkeypatch.setattr(
|
||||
module,
|
||||
"get_request_json",
|
||||
lambda: _AwaitableValue({
|
||||
"stream": False,
|
||||
"messages": [{"role": "user", "content": "latest question"}],
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"max_tokens": 0,
|
||||
}),
|
||||
)
|
||||
|
||||
res = _run(inspect.unwrap(module.session_completion)())
|
||||
|
||||
assert res["code"] == 0, res
|
||||
assert captured["llm_setting"] == {
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0,
|
||||
"max_tokens": 0,
|
||||
}
|
||||
assert not {
|
||||
"temperature",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"max_tokens",
|
||||
}.intersection(captured["kwargs"])
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_session_completion_merges_generation_params_for_existing_chat(monkeypatch):
|
||||
module = _load_chat_api_module(monkeypatch)
|
||||
|
||||
base_llm_setting = {"temperature": 0.7, "top_p": 0.3, "custom": "keep"}
|
||||
dia = SimpleNamespace(
|
||||
id="chat-1",
|
||||
tenant_id="tenant-1",
|
||||
llm_id="model",
|
||||
llm_setting=base_llm_setting,
|
||||
prompt_config={"prologue": ""},
|
||||
kb_ids=[],
|
||||
)
|
||||
conv = SimpleNamespace(
|
||||
id="session-1",
|
||||
dialog_id="chat-1",
|
||||
message=[],
|
||||
reference=[],
|
||||
user_id="authenticated-user",
|
||||
name="test",
|
||||
)
|
||||
conv.to_dict = lambda: {
|
||||
"id": conv.id,
|
||||
"dialog_id": conv.dialog_id,
|
||||
"message": conv.message,
|
||||
"reference": conv.reference,
|
||||
"user_id": conv.user_id,
|
||||
"name": conv.name,
|
||||
}
|
||||
captured = {}
|
||||
|
||||
async def _fake_async_chat(captured_dia, _messages, stream=True, **_kwargs):
|
||||
captured["llm_setting"] = dict(captured_dia.llm_setting)
|
||||
yield {"answer": "ok", "reference": {}}
|
||||
|
||||
monkeypatch.setattr(module.DialogService, "get_by_id", lambda _dialog_id: (True, dia))
|
||||
monkeypatch.setattr(module.ConversationService, "get_by_id", lambda _id: (True, conv))
|
||||
monkeypatch.setattr(module.ConversationService, "update_by_id", lambda *_a, **_k: True, raising=False)
|
||||
monkeypatch.setattr(module, "async_chat", _fake_async_chat)
|
||||
monkeypatch.setattr(module, "structure_answer", lambda _conv, ans, _message_id, _session_id: ans)
|
||||
monkeypatch.setattr(
|
||||
module,
|
||||
"get_request_json",
|
||||
lambda: _AwaitableValue({
|
||||
"chat_id": "chat-1",
|
||||
"session_id": "session-1",
|
||||
"stream": False,
|
||||
"messages": [{"role": "user", "content": "latest question"}],
|
||||
"temperature": 0,
|
||||
"presence_penalty": 0,
|
||||
}),
|
||||
)
|
||||
|
||||
res = _run(inspect.unwrap(module.session_completion)())
|
||||
|
||||
assert res["code"] == 0, res
|
||||
assert captured["llm_setting"] == {
|
||||
"temperature": 0,
|
||||
"top_p": 0.3,
|
||||
"custom": "keep",
|
||||
"presence_penalty": 0,
|
||||
}
|
||||
assert base_llm_setting == {"temperature": 0.7, "top_p": 0.3, "custom": "keep"}
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_session_completion_can_use_submitted_full_history(monkeypatch):
|
||||
"""The UI opt-in flag should preserve the previous full-history request behavior."""
|
||||
|
||||
Reference in New Issue
Block a user