From e6e80041f549582fd0164afcd5d52c91b3fe861f Mon Sep 17 00:00:00 2001
From: buua436 <sz_buua@foxmail.com>
Date: Tue, 28 Apr 2026 17:09:08 +0800
Subject: [PATCH] Fix: agent toolcall null response & schema validation &
 DeepSeek think history (#14425)

### What problem does this PR solve?
agent toolcall null response & schema validation & DeepSeek think
history

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 agent/component/agent_with_tools.py |  3 +-
 agent/tools/base.py                 | 13 ++++++
 rag/llm/chat_model.py               | 68 +++++++++++++++++++----------
 3 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/agent/component/agent_with_tools.py b/agent/component/agent_with_tools.py
index 56f23afe35..d59d8eb804 100644
--- a/agent/component/agent_with_tools.py
+++ b/agent/component/agent_with_tools.py
@@ -145,7 +145,8 @@ class Agent(LLM, ToolBase):
         self._param.function_name = self._id.split("-->")[-1]
         m = super().get_meta()
         if hasattr(self._param, "user_prompt") and self._param.user_prompt:
-            m["function"]["parameters"]["properties"]["user_prompt"] = self._param.user_prompt
+            # Keep the JSON schema valid; user_prompt is a string field, not a schema node.
+            m["function"]["parameters"]["properties"]["user_prompt"]["default"] = self._param.user_prompt
         return m
 
     def get_input_form(self) -> dict[str, dict]:
diff --git a/agent/tools/base.py b/agent/tools/base.py
index f5a42de4d1..194b47fcee 100644
--- a/agent/tools/base.py
+++ b/agent/tools/base.py
@@ -67,6 +67,19 @@ class LLMToolPluginCallSession(ToolCallSession):
         else:
             resp = await thread_pool_exec(tool_obj.invoke, **arguments)
 
+        if resp is None and hasattr(tool_obj, "output") and callable(tool_obj.output):
+            try:
+                fallback_output = tool_obj.output()
+                if isinstance(fallback_output, dict) and fallback_output.get("content") not in (None, ""):
+                    resp = fallback_output["content"]
+                elif fallback_output not in (None, ""):
+                    resp = fallback_output
+                else:
+                    resp = fallback_output
+                logging.warning(f"[ToolCall] resp is None, fallback to output name={name} output_keys={list(fallback_output.keys()) if isinstance(fallback_output, dict) else type(fallback_output).__name__}")
+            except Exception as e:
+                logging.warning(f"[ToolCall] resp is None and output fallback failed name={name} err={e}")
+
         elapsed = timer() - st
         logging.info(f"[ToolCall] done name={name} elapsed={elapsed:.2f}s result={str(resp)[:200]}")
         self.callback(name, arguments, resp, elapsed_time=elapsed)
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index a58e8450c0..3aa13d03d8 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1322,6 +1322,9 @@ class LiteLLMBase(ABC):
         gen_conf.pop("max_tokens", None)
         return gen_conf
 
+    def _need_reasoning_content_back(self) -> bool:
+        return self.provider == SupportedLiteLLMProvider.DeepSeek
+
     async def async_chat(self, system, history, gen_conf, **kwargs):
         hist = list(history) if history else []
         if system:
@@ -1456,23 +1459,24 @@ class LiteLLMBase(ABC):
     def _verbose_tool_use(self, name, args, res):
         return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
 
-    def _append_history(self, hist, tool_call, tool_res):
-        hist.append(
-            {
-                "role": "assistant",
-                "tool_calls": [
-                    {
-                        "index": tool_call.index,
-                        "id": tool_call.id,
-                        "function": {
-                            "name": tool_call.function.name,
-                            "arguments": tool_call.function.arguments,
-                        },
-                        "type": "function",
+    def _append_history(self, hist, tool_call, tool_res, reasoning_content=None):
+        assistant_msg = {
+            "role": "assistant",
+            "tool_calls": [
+                {
+                    "index": tool_call.index,
+                    "id": tool_call.id,
+                    "function": {
+                        "name": tool_call.function.name,
+                        "arguments": tool_call.function.arguments,
                     },
-                ],
-            }
-        )
+                    "type": "function",
+                },
+            ],
+        }
+        if reasoning_content:
+            assistant_msg["reasoning_content"] = reasoning_content
+        hist.append(assistant_msg)
         try:
             if isinstance(tool_res, dict):
                 tool_res = json.dumps(tool_res, ensure_ascii=False)
@@ -1480,13 +1484,13 @@ class LiteLLMBase(ABC):
             hist.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(tool_res)})
         return hist
 
-    def _append_history_batch(self, hist, results):
+    def _append_history_batch(self, hist, results, reasoning_content=None):
         """
         Append a batch of tool calls to history following the OpenAI protocol:
         one assistant message containing all tool_calls, followed by one tool message per call.
         results: list of (tool_call, name, args, result, error)
         """
-        hist.append({
+        assistant_msg = {
             "role": "assistant",
             "tool_calls": [
                 {
@@ -1497,7 +1501,10 @@ class LiteLLMBase(ABC):
                 }
                 for tc, _, _, _, _ in results
             ],
-        })
+        }
+        if reasoning_content:
+            assistant_msg["reasoning_content"] = reasoning_content
+        hist.append(assistant_msg)
         for tc, _, _, result, err in results:
             if err:
                 content = str(err)
@@ -1542,11 +1549,13 @@ class LiteLLMBase(ABC):
                         raise Exception(f"500 response structure error. Response: {response}")
 
                     message = response.choices[0].message
+                    reasoning_content = None
+                    if self._need_reasoning_content_back():
+                        reasoning_content = getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None)
 
                     if not hasattr(message, "tool_calls") or not message.tool_calls:
-                        _reasoning = getattr(message, "reasoning_content", None) or getattr(message, "reasoning", None)
-                        if _reasoning:
-                            ans += f"<think>{_reasoning}</think>"
+                        if reasoning_content:
+                            ans += f"<think>{reasoning_content}</think>"
                         ans += message.content or ""
                         if response.choices[0].finish_reason == "length":
                             ans = self._length_stop(ans)
@@ -1567,7 +1576,11 @@ class LiteLLMBase(ABC):
 
                     logging.info(f"Response tool_calls={message.tool_calls}")
                     results = await asyncio.gather(*[_exec_tool(tc) for tc in message.tool_calls])
-                    history = self._append_history_batch(history, results)
+                    history = self._append_history_batch(
+                        history,
+                        results,
+                        reasoning_content=reasoning_content if self._need_reasoning_content_back() else None,
+                    )
                     for tc, name, args, result, err in results:
                         ans += self._verbose_tool_use(name, args, err if err else result)
 
@@ -1600,6 +1613,7 @@ class LiteLLMBase(ABC):
             try:
                 for _round in range(self.max_rounds + 1):
                     reasoning_start = False
+                    reasoning_content = ""
                     logging.info(f"[ToolLoop] round={_round} model={self.model_name} tools={[t['function']['name'] for t in tools]}")
 
                     completion_args = self._construct_completion_args(history=history, stream=True, tools=True, **gen_conf)
@@ -1634,6 +1648,8 @@ class LiteLLMBase(ABC):
 
                         _reasoning = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
                         if _reasoning:
+                            if self._need_reasoning_content_back():
+                                reasoning_content += _reasoning
                             ans = ""
                             if not reasoning_start:
                                 reasoning_start = True
@@ -1682,7 +1698,11 @@ class LiteLLMBase(ABC):
                             args = {}
                         yield self._verbose_tool_use(tc.function.name, args, "Begin to call...")
                     results = await asyncio.gather(*[_exec_tool(tc) for tc in tcs])
-                    history = self._append_history_batch(history, results)
+                    history = self._append_history_batch(
+                        history,
+                        results,
+                        reasoning_content=reasoning_content if self._need_reasoning_content_back() else None,
+                    )
                     for tc, name, args, result, err in results:
                         yield self._verbose_tool_use(name, args, err if err else result)