From 8269fa01b41041b7e4f41c0630800e35383a2fef Mon Sep 17 00:00:00 2001
From: euvre <93761161+euvre@users.noreply.github.com>
Date: Tue, 5 May 2026 23:39:40 -0700
Subject: [PATCH] Fix AttributeError when appending non-streaming tool calls to
 chat history in Agentic Agent (#14456)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What problem does this PR solve?

Fix #14340

## Problem Description

When using an **Agentic Agent** (not Workflow) with one or more
Retrieval tools (e.g., Dataset Retrieval + Memory Retrieval), the agent
silently returns an empty response (`agent_response: ""`) after hanging
for several minutes. The server logs show:

```
AttributeError: 'ChatCompletionMessageToolCall' object has no attribute 'index'
```

This error propagates as a `GENERIC_ERROR`, causing the canvas to return
an empty response. The subsequent Memory save task then receives the
empty `agent_response` and logs:

```
Document for referred_document_id XXXX not found
```

## Reproduction Steps

1. Set `DOC_ENGINE=infinity` (or `elasticsearch` — the engine itself is
not the root cause).
2. Create a blank **Agentic Agent** (not a Workflow).
3. Add **two Retrieval tools** to the Agent node:
   - `Retrieval_DS` → Dataset (Knowledge Base)
   - `Retrieval_Mem` → Memory component
4. Add a **Message** node with **Save to Memory** enabled.
5. Launch the agent and send any message (e.g., "hola").
6. The agent hangs and returns an empty response.

## Root Cause Analysis

The crash occurs in `_append_history` and `_append_history_batch` inside
`rag/llm/chat_model.py`. These methods directly access `.index` on tool
call objects:

```python
# _append_history_batch
{
    "index": tc.index,   # <-- crashes here
    ...
}
```

However, **non-streaming** LLM responses (`stream=False`) return
`ChatCompletionMessageToolCall` objects, which **do not have an `index`
field** according to the OpenAI API specification. The `index` field
only exists on `ChoiceDeltaToolCall` objects returned in **streaming**
responses (`stream=True`).

When the agentic agent triggers an internal `full_question` call (used
to compress multi-turn conversation history), the request is incorrectly
routed through `async_chat_with_tools` because `is_tools=True` is set at
the `LLMBundle` level. If the LLM decides to emit `tool_calls` during
this auxiliary request, the code enters the non-streaming tool loop and
crashes when trying to append history.

## Fix

Replaced all direct `.index` accesses with `getattr(..., "index", None)`
for safe, backward-compatible access:

| Method | File | Line | Change |
|--------|------|------|--------|
| `_append_history` | `rag/llm/chat_model.py` | ~L304 |
`tool_call.index` → `getattr(tool_call, "index", None)` |
| `_append_history_batch` | `rag/llm/chat_model.py` | ~L332 | `tc.index`
→ `getattr(tc, "index", None)` |
| `_append_history` | `rag/llm/chat_model.py` | ~L1467 |
`tool_call.index` → `getattr(tool_call, "index", None)` |
| `_append_history_batch` | `rag/llm/chat_model.py` | ~L1496 |
`tc.index` → `getattr(tc, "index", None)` |

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Signed-off-by: noob <yixiao121314@outlook.com>
---
 rag/llm/chat_model.py | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index d31587e33c..b8a4a5a0de 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -301,7 +301,7 @@ class Base(ABC):
                 "role": "assistant",
                 "tool_calls": [
                     {
-                        "index": tool_call.index,
+                        "index": getattr(tool_call, "index", None),
                         "id": tool_call.id,
                         "function": {
                             "name": tool_call.function.name,
@@ -325,18 +325,20 @@ class Base(ABC):
         one assistant message containing all tool_calls, followed by one tool message per call.
         results: list of (tool_call, name, args, result, error)
         """
-        hist.append({
-            "role": "assistant",
-            "tool_calls": [
-                {
-                    "index": tc.index,
-                    "id": tc.id,
-                    "function": {"name": tc.function.name, "arguments": tc.function.arguments},
-                    "type": "function",
-                }
-                for tc, _, _, _, _ in results
-            ],
-        })
+        hist.append(
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "index": getattr(tc, "index", None),
+                        "id": tc.id,
+                        "function": {"name": tc.function.name, "arguments": tc.function.arguments},
+                        "type": "function",
+                    }
+                    for tc, _, _, _, _ in results
+                ],
+            }
+        )
         for tc, _, _, result, err in results:
             if err:
                 content = str(err)
@@ -1474,7 +1476,7 @@ class LiteLLMBase(ABC):
             "role": "assistant",
             "tool_calls": [
                 {
-                    "index": tool_call.index,
+                    "index": getattr(tool_call, "index", None),
                     "id": tool_call.id,
                     "function": {
                         "name": tool_call.function.name,
@@ -1504,7 +1506,7 @@ class LiteLLMBase(ABC):
             "role": "assistant",
             "tool_calls": [
                 {
-                    "index": tc.index,
+                    "index": getattr(tc, "index", None),
                     "id": tc.id,
                     "function": {"name": tc.function.name, "arguments": tc.function.arguments},
                     "type": "function",
@@ -1850,17 +1852,19 @@ class LiteLLMBase(ABC):
             completion_args["extra_headers"] = extra_headers
         return completion_args
 
+
 class RAGconChat(Base):
     """
     RAGcon Chat Provider - routes through LiteLLM proxy
-    
+
     All model types are handled through a unified LiteLLM endpoint.
     Default Base URL: https://connect.ragcon.com/v1
     """
+
     _FACTORY_NAME = "RAGcon"
-    
+
     def __init__(self, key, model_name, base_url=None, **kwargs):
         if not base_url:
             base_url = "https://connect.ragcon.com/v1"
-        
+
         super().__init__(key, model_name, base_url, **kwargs)