feat: pass chat_template_kwargs through agent chat completion (#14542)

### What problem does this PR solve? The agent API currently does not pass chat_template_kwargs to the underlying LLM call path, so clients cannot control template-level model behavior (such as thinking-mode toggles) when invoking /agents/chat/completion. This PR adds passthrough support for chat_template_kwargs across agent execution flows (session and non-session, streaming and non-streaming) by propagating it through canvas runtime state and into LLM invocation kwargs. This addresses the feature gap raised in [Issue #14182](https://github.com/infiniflow/ragflow/issues/14182). Closes #14182 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-06-29 15:31:05 +08:00 · 2026-05-22 02:15:49 -05:00
parent c33d0b8081
commit 8f90740d2e
5 changed files with 49 additions and 7 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -402,7 +402,7 @@ class Canvas(Graph):
                break

        for k in kwargs.keys():
-            if k in ["query", "user_id", "files"] and kwargs[k]:
+            if k in ["query", "user_id", "files", "chat_template_kwargs"] and kwargs[k]:
                if k == "files":
                    self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize)
                else:
--- a/agent/component/llm.py
+++ b/agent/component/llm.py
@@ -345,6 +345,8 @@ class LLM(ComponentBase):
            return re.sub(r"(<think>|</think>)", "", delta_ans)

        stream_kwargs = {"images": self.imgs} if self.imgs else {}
+        extra_chat_kwargs = self._get_chat_template_kwargs()
+        stream_kwargs.update(extra_chat_kwargs)
        async for ans in self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **stream_kwargs):
            if self.check_if_canceled("LLM streaming"):
                return
@@ -375,6 +377,7 @@ class LLM(ComponentBase):
            return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)

        prompt, msg, _ = self._prepare_prompt_variables()
+        extra_chat_kwargs = self._get_chat_template_kwargs()
        error: str = ""
        output_structure = None
        try:
@@ -393,7 +396,7 @@ class LLM(ComponentBase):
                    int(self.chat_mdl.max_length * 0.97),
                )
                error = ""
-                ans = await self._generate_async(msg_fit)
+                ans = await self._generate_async(msg_fit, **extra_chat_kwargs)
                msg_fit.pop(0)
                if ans.find("**ERROR**") >= 0:
                    logging.error(f"LLM response error: {ans}")
@@ -426,7 +429,7 @@ class LLM(ComponentBase):
                [{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
            )
            error = ""
-            ans = await self._generate_async(msg_fit)
+            ans = await self._generate_async(msg_fit, **extra_chat_kwargs)
            msg_fit.pop(0)
            if ans.find("**ERROR**") >= 0:
                logging.error(f"LLM response error: {ans}")
@@ -445,6 +448,24 @@ class LLM(ComponentBase):
    def _invoke(self, **kwargs):
        return asyncio.run(self._invoke_async(**kwargs))

+    def _get_chat_template_kwargs(self) -> dict[str, Any]:
+        chat_template_kwargs = self._canvas.globals.get("sys.chat_template_kwargs")
+        if chat_template_kwargs is None:
+            return {}
+
+        # The API should pass this as a JSON object, but accept a JSON string for compatibility.
+        if isinstance(chat_template_kwargs, str):
+            try:
+                chat_template_kwargs = json_repair.loads(chat_template_kwargs)
+            except Exception:
+                logging.warning("Ignore invalid sys.chat_template_kwargs: expected JSON object or JSON string object.")
+                return {}
+
+        if not isinstance(chat_template_kwargs, dict):
+            logging.warning("Ignore invalid sys.chat_template_kwargs type: %s", type(chat_template_kwargs).__name__)
+            return {}
+        return {"chat_template_kwargs": chat_template_kwargs}
+
    async def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
        summ = await tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
        logging.info(f"[MEMORY]: {summ}")