From 8269fa01b41041b7e4f41c0630800e35383a2fef Mon Sep 17 00:00:00 2001 From: euvre <93761161+euvre@users.noreply.github.com> Date: Tue, 5 May 2026 23:39:40 -0700 Subject: [PATCH] Fix AttributeError when appending non-streaming tool calls to chat history in Agentic Agent (#14456) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Fix #14340 ## Problem Description When using an **Agentic Agent** (not Workflow) with one or more Retrieval tools (e.g., Dataset Retrieval + Memory Retrieval), the agent silently returns an empty response (`agent_response: ""`) after hanging for several minutes. The server logs show: ``` AttributeError: 'ChatCompletionMessageToolCall' object has no attribute 'index' ``` This error propagates as a `GENERIC_ERROR`, causing the canvas to return an empty response. The subsequent Memory save task then receives the empty `agent_response` and logs: ``` Document for referred_document_id XXXX not found ``` ## Reproduction Steps 1. Set `DOC_ENGINE=infinity` (or `elasticsearch` — the engine itself is not the root cause). 2. Create a blank **Agentic Agent** (not a Workflow). 3. Add **two Retrieval tools** to the Agent node: - `Retrieval_DS` → Dataset (Knowledge Base) - `Retrieval_Mem` → Memory component 4. Add a **Message** node with **Save to Memory** enabled. 5. Launch the agent and send any message (e.g., "hola"). 6. The agent hangs and returns an empty response. ## Root Cause Analysis The crash occurs in `_append_history` and `_append_history_batch` inside `rag/llm/chat_model.py`. These methods directly access `.index` on tool call objects: ```python # _append_history_batch { "index": tc.index, # <-- crashes here ... } ``` However, **non-streaming** LLM responses (`stream=False`) return `ChatCompletionMessageToolCall` objects, which **do not have an `index` field** according to the OpenAI API specification. The `index` field only exists on `ChoiceDeltaToolCall` objects returned in **streaming** responses (`stream=True`). When the agentic agent triggers an internal `full_question` call (used to compress multi-turn conversation history), the request is incorrectly routed through `async_chat_with_tools` because `is_tools=True` is set at the `LLMBundle` level. If the LLM decides to emit `tool_calls` during this auxiliary request, the code enters the non-streaming tool loop and crashes when trying to append history. ## Fix Replaced all direct `.index` accesses with `getattr(..., "index", None)` for safe, backward-compatible access: | Method | File | Line | Change | |--------|------|------|--------| | `_append_history` | `rag/llm/chat_model.py` | ~L304 | `tool_call.index` → `getattr(tool_call, "index", None)` | | `_append_history_batch` | `rag/llm/chat_model.py` | ~L332 | `tc.index` → `getattr(tc, "index", None)` | | `_append_history` | `rag/llm/chat_model.py` | ~L1467 | `tool_call.index` → `getattr(tool_call, "index", None)` | | `_append_history_batch` | `rag/llm/chat_model.py` | ~L1496 | `tc.index` → `getattr(tc, "index", None)` | ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Signed-off-by: noob --- rag/llm/chat_model.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index d31587e33c..b8a4a5a0de 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -301,7 +301,7 @@ class Base(ABC): "role": "assistant", "tool_calls": [ { - "index": tool_call.index, + "index": getattr(tool_call, "index", None), "id": tool_call.id, "function": { "name": tool_call.function.name, @@ -325,18 +325,20 @@ class Base(ABC): one assistant message containing all tool_calls, followed by one tool message per call. results: list of (tool_call, name, args, result, error) """ - hist.append({ - "role": "assistant", - "tool_calls": [ - { - "index": tc.index, - "id": tc.id, - "function": {"name": tc.function.name, "arguments": tc.function.arguments}, - "type": "function", - } - for tc, _, _, _, _ in results - ], - }) + hist.append( + { + "role": "assistant", + "tool_calls": [ + { + "index": getattr(tc, "index", None), + "id": tc.id, + "function": {"name": tc.function.name, "arguments": tc.function.arguments}, + "type": "function", + } + for tc, _, _, _, _ in results + ], + } + ) for tc, _, _, result, err in results: if err: content = str(err) @@ -1474,7 +1476,7 @@ class LiteLLMBase(ABC): "role": "assistant", "tool_calls": [ { - "index": tool_call.index, + "index": getattr(tool_call, "index", None), "id": tool_call.id, "function": { "name": tool_call.function.name, @@ -1504,7 +1506,7 @@ class LiteLLMBase(ABC): "role": "assistant", "tool_calls": [ { - "index": tc.index, + "index": getattr(tc, "index", None), "id": tc.id, "function": {"name": tc.function.name, "arguments": tc.function.arguments}, "type": "function", @@ -1850,17 +1852,19 @@ class LiteLLMBase(ABC): completion_args["extra_headers"] = extra_headers return completion_args + class RAGconChat(Base): """ RAGcon Chat Provider - routes through LiteLLM proxy - + All model types are handled through a unified LiteLLM endpoint. Default Base URL: https://connect.ragcon.com/v1 """ + _FACTORY_NAME = "RAGcon" - + def __init__(self, key, model_name, base_url=None, **kwargs): if not base_url: base_url = "https://connect.ragcon.com/v1" - + super().__init__(key, model_name, base_url, **kwargs)