mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix: add legacy chat/completions mode (#16014)
### What problem does this PR solve? Adds a legacy mode for /chat/completions that restores v0.23.0-style output by converting start_to_think/end_to_think back into raw <think></think> markers and streaming cumulative answer text. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -1229,6 +1229,11 @@ async def session_completion(chat_id_in_arg=""):
|
||||
dia.llm_id = tenant_info.llm_id
|
||||
merge_generation_config(dia, chat_model_config)
|
||||
|
||||
legacy = _get_bool_request_flag(
|
||||
req,
|
||||
"legacy",
|
||||
default=False,
|
||||
)
|
||||
stream_mode = req.pop("stream", True)
|
||||
|
||||
def _format_answer(ans):
|
||||
@@ -1242,10 +1247,53 @@ async def session_completion(chat_id_in_arg=""):
|
||||
"""Yield SSE-formatted chunks from the async chat generator."""
|
||||
nonlocal dia, msg, req, conv
|
||||
try:
|
||||
async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
|
||||
ans = _format_answer(ans)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": ans})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
if legacy:
|
||||
# v0.23.0-style streaming: emit accumulated answer text and
|
||||
# reconstruct raw <think>...</think> markers from the newer
|
||||
# start_to_think/end_to_think events.
|
||||
legacy_answer = ""
|
||||
final_answer = None
|
||||
async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
|
||||
ans = _format_answer(ans)
|
||||
if ans.get("final"):
|
||||
final_answer = ans
|
||||
continue
|
||||
if ans.get("start_to_think"):
|
||||
legacy_answer += "<think>"
|
||||
legacy_chunk = {**ans, "answer": legacy_answer}
|
||||
legacy_chunk.pop("start_to_think", None)
|
||||
legacy_chunk.pop("end_to_think", None)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
continue
|
||||
if ans.get("end_to_think"):
|
||||
legacy_answer += "</think>"
|
||||
legacy_chunk = {**ans, "answer": legacy_answer}
|
||||
legacy_chunk.pop("start_to_think", None)
|
||||
legacy_chunk.pop("end_to_think", None)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
continue
|
||||
delta = ans.get("answer") or ""
|
||||
if not delta:
|
||||
continue
|
||||
legacy_answer += delta
|
||||
legacy_chunk = {**ans, "answer": legacy_answer}
|
||||
legacy_chunk.pop("start_to_think", None)
|
||||
legacy_chunk.pop("end_to_think", None)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": legacy_chunk})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
if final_answer is not None:
|
||||
final_chunk = {**final_answer, "answer": final_answer.get("answer") or legacy_answer}
|
||||
final_chunk.pop("start_to_think", None)
|
||||
final_chunk.pop("end_to_think", None)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": final_chunk})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
else:
|
||||
async for ans in async_chat(dia, msg, True, session_id=session_id, **req):
|
||||
ans = _format_answer(ans)
|
||||
payload = _sanitize_json_floats({"code": 0, "message": "", "data": ans})
|
||||
yield "data:" + json.dumps(payload, ensure_ascii=False) + "\n\n"
|
||||
if conv is not None:
|
||||
await thread_pool_exec(ConversationService.update_by_id, conv.id, conv.to_dict())
|
||||
except Exception as ex:
|
||||
|
||||
@@ -900,6 +900,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
|
||||
final = await decorate_answer(_extract_visible_answer(thought + full_answer))
|
||||
final["final"] = True
|
||||
final["audio_binary"] = None
|
||||
final["answer"] = ""
|
||||
yield final
|
||||
else:
|
||||
if llm_model_config["model_type"] == "chat":
|
||||
@@ -1715,6 +1716,7 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
|
||||
full_answer = last_state.full_text if last_state else ""
|
||||
final = await decorate_answer(_extract_visible_answer(full_answer))
|
||||
final["final"] = True
|
||||
final["answer"] = ""
|
||||
yield final
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user