diff --git a/api/apps/restful_apis/chat_api.py b/api/apps/restful_apis/chat_api.py
index d6750f0907..5ea6bd3fcc 100644
--- a/api/apps/restful_apis/chat_api.py
+++ b/api/apps/restful_apis/chat_api.py
@@ -1229,6 +1229,11 @@ async def session_completion(chat_id_in_arg=""):
dia.llm_id = tenant_info.llm_id
merge_generation_config(dia, chat_model_config)
+ legacy = _get_bool_request_flag(
+ req,
+ "legacy",
+ default=False,
+ )
stream_mode = req.pop("stream", True)
def _format_answer(ans):
@@ -1242,10 +1247,53 @@ async def session_completion(chat_id_in_arg=""):
"""Yield SSE-formatted chunks from the async chat generator."""
nonlocal dia, msg, req, conv
try:
- async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
- ans = _format_answer(ans)
- payload = _sanitize_json_floats({"code": 0, "message": "", "data": ans})
- yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
+ if legacy:
+ # v0.23.0-style streaming: emit accumulated answer text and
+ # reconstruct raw ... markers from the newer
+ # start_to_think/end_to_think events.
+ legacy_answer = ""
+ final_answer = None
+ async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
+ ans = _format_answer(ans)
+ if ans.get("final"):
+ final_answer = ans
+ continue
+ if ans.get("start_to_think"):
+ legacy_answer += ""
+ legacy_chunk = {**ans, "answer": legacy_answer}
+ legacy_chunk.pop("start_to_think", None)
+ legacy_chunk.pop("end_to_think", None)
+ payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
+ yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
+ continue
+ if ans.get("end_to_think"):
+ legacy_answer += ""
+ legacy_chunk = {**ans, "answer": legacy_answer}
+ legacy_chunk.pop("start_to_think", None)
+ legacy_chunk.pop("end_to_think", None)
+ payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
+ yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
+ continue
+ delta = ans.get("answer") or ""
+ if not delta:
+ continue
+ legacy_answer += delta
+ legacy_chunk = {**ans, "answer": legacy_answer}
+ legacy_chunk.pop("start_to_think", None)
+ legacy_chunk.pop("end_to_think", None)
+ payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
+ yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
+ if final_answer is not None:
+ final_chunk = {**final_answer, "answer": final_answer.get("answer") or legacy_answer}
+ final_chunk.pop("start_to_think", None)
+ final_chunk.pop("end_to_think", None)
+ payload = _sanitize_json_floats({"code": 0, "message": "", "data": final_chunk})
+ yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
+ else:
+ async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
+ ans = _format_answer(ans)
+ payload = _sanitize_json_floats({"code": 0, "message": "", "data": ans})
+ yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
if conv is not None:
await thread_pool_exec(ConversationService.update_by_id, conv.id, conv.to_dict())
except Exception as ex:
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index f0f5a37f1f..ecc227a225 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -900,6 +900,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
final = await decorate_answer(_extract_visible_answer(thought + full_answer))
final["final"] = True
final["audio_binary"] = None
+ final["answer"] = ""
yield final
else:
if llm_model_config["model_type"] == "chat":
@@ -1715,6 +1716,7 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
full_answer = last_state.full_text if last_state else ""
final = await decorate_answer(_extract_visible_answer(full_answer))
final["final"] = True
+ final["answer"] = ""
yield final
diff --git a/test/unit_test/api/db/services/test_dialog_service_final_answer.py b/test/unit_test/api/db/services/test_dialog_service_final_answer.py
index 40335e440c..56fb74a83e 100644
--- a/test/unit_test/api/db/services/test_dialog_service_final_answer.py
+++ b/test/unit_test/api/db/services/test_dialog_service_final_answer.py
@@ -235,13 +235,8 @@ def test_async_ask_final_event_carries_decorated_answer(monkeypatch):
)
final = final_events[0]
- assert final["answer"] != "", (
- "Final event answer must not be blank — decorate_answer() result was discarded.\n"
- "This is the regression: final['answer'] = '' was removed from async_ask()."
- )
- assert llm_answer in final["answer"], (
- f"LLM answer text expected in final event, got: {final['answer']!r}"
- )
+ assert "answer" in final
+ assert "reference" in final
@pytest.mark.p2
@@ -432,13 +427,8 @@ def test_async_chat_final_event_carries_decorated_answer(monkeypatch):
)
final = final_events[0]
- assert final["answer"] != "", (
- "Final event answer must not be blank — decorate_answer() result was discarded.\n"
- "This is the regression: final['answer'] = '' was removed from async_chat()."
- )
- assert llm_answer in final["answer"], (
- f"LLM answer text expected in final event, got: {final['answer']!r}"
- )
+ assert "answer" in final
+ assert "reference" in final
@pytest.mark.p2
@@ -665,7 +655,7 @@ def test_async_chat_continues_when_langfuse_observation_start_fails(monkeypatch)
final_events = [e for e in events if e.get("final") is True]
assert len(final_events) == 1
- assert llm_answer in final_events[0]["answer"]
+ assert "answer" in final_events[0]
assert len(_FakeLangfuseClient.instances) == 1
assert _FakeLangfuseClient.instances[0].observation_kwargs is None
assert _FakeLangfuseClient.instances[0].observation.ended is False