feat: pass chat_template_kwargs through agent chat completion (#14542)

### What problem does this PR solve?

The agent API currently does not pass chat_template_kwargs to the
underlying LLM call path, so clients cannot control template-level model
behavior (such as thinking-mode toggles) when invoking
/agents/chat/completion. This PR adds passthrough support for
chat_template_kwargs across agent execution flows (session and
non-session, streaming and non-streaming) by propagating it through
canvas runtime state and into LLM invocation kwargs. This addresses the
feature gap raised in [Issue
#14182](https://github.com/infiniflow/ragflow/issues/14182).

Closes #14182 

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Full Stack Developer
2026-05-22 02:15:49 -05:00
committed by GitHub
parent c33d0b8081
commit 8f90740d2e
5 changed files with 49 additions and 7 deletions

View File

@@ -402,7 +402,7 @@ class Canvas(Graph):
break
for k in kwargs.keys():
if k in ["query", "user_id", "files"] and kwargs[k]:
if k in ["query", "user_id", "files", "chat_template_kwargs"] and kwargs[k]:
if k == "files":
self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize)
else:

View File

@@ -345,6 +345,8 @@ class LLM(ComponentBase):
return re.sub(r"(<think>|</think>)", "", delta_ans)
stream_kwargs = {"images": self.imgs} if self.imgs else {}
extra_chat_kwargs = self._get_chat_template_kwargs()
stream_kwargs.update(extra_chat_kwargs)
async for ans in self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **stream_kwargs):
if self.check_if_canceled("LLM streaming"):
return
@@ -375,6 +377,7 @@ class LLM(ComponentBase):
return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)
prompt, msg, _ = self._prepare_prompt_variables()
extra_chat_kwargs = self._get_chat_template_kwargs()
error: str = ""
output_structure = None
try:
@@ -393,7 +396,7 @@ class LLM(ComponentBase):
int(self.chat_mdl.max_length * 0.97),
)
error = ""
ans = await self._generate_async(msg_fit)
ans = await self._generate_async(msg_fit, **extra_chat_kwargs)
msg_fit.pop(0)
if ans.find("**ERROR**") >= 0:
logging.error(f"LLM response error: {ans}")
@@ -426,7 +429,7 @@ class LLM(ComponentBase):
[{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
)
error = ""
ans = await self._generate_async(msg_fit)
ans = await self._generate_async(msg_fit, **extra_chat_kwargs)
msg_fit.pop(0)
if ans.find("**ERROR**") >= 0:
logging.error(f"LLM response error: {ans}")
@@ -445,6 +448,24 @@ class LLM(ComponentBase):
def _invoke(self, **kwargs):
return asyncio.run(self._invoke_async(**kwargs))
def _get_chat_template_kwargs(self) -> dict[str, Any]:
chat_template_kwargs = self._canvas.globals.get("sys.chat_template_kwargs")
if chat_template_kwargs is None:
return {}
# The API should pass this as a JSON object, but accept a JSON string for compatibility.
if isinstance(chat_template_kwargs, str):
try:
chat_template_kwargs = json_repair.loads(chat_template_kwargs)
except Exception:
logging.warning("Ignore invalid sys.chat_template_kwargs: expected JSON object or JSON string object.")
return {}
if not isinstance(chat_template_kwargs, dict):
logging.warning("Ignore invalid sys.chat_template_kwargs type: %s", type(chat_template_kwargs).__name__)
return {}
return {"chat_template_kwargs": chat_template_kwargs}
async def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
summ = await tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
logging.info(f"[MEMORY]: {summ}")