fix: handle missing 'total' key causing KeyError in deep research retrieval (#13942)

## Summary

- When KB retrieval fails (e.g. ES `AssertionError` on empty
`index_names`), `kbinfos` falls back to a dict without a `total` key
- `_async_update_chunk_info` then iterates over `chunk_info.keys()`
(which includes `total`) and tries `kbinfos['total']`, raising a
`KeyError`
- This error surfaces when using Tavily web retrieval in a chat with no
knowledge base attached

## Changes

- Add `'total': 0` to all default `kbinfos` dicts in
`_retrieve_information`
- Add `setdefault('total', 0)` guard after successful KB retrieval to
handle cases where the retrieval result omits the key
- Accumulate `total` correctly in the merge branch of
`_async_update_chunk_info`

## Test plan

- [ ] Start a chat with Tavily configured and no knowledge base
- [ ] Verify no `KeyError: 'total'` is raised
- [ ] Verify Tavily results are returned correctly

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
VincentLambert
2026-05-09 04:57:51 +02:00
committed by GitHub
parent 870bc59365
commit 4f3711d37f

View File

@@ -41,9 +41,10 @@ class TreeStructuredQueryDecompositionRetrieval:
async def _retrieve_information(self, search_query): async def _retrieve_information(self, search_query):
"""Retrieve information from different sources""" """Retrieve information from different sources"""
# 1. Knowledge base retrieval # 1. Knowledge base retrieval
kbinfos = [] kbinfos = {"total": 0, "chunks": [], "doc_aggs": []}
try: try:
kbinfos = await self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []} kbinfos = await self._kb_retrieve(question=search_query) if self._kb_retrieve else {"total": 0, "chunks": [], "doc_aggs": []}
kbinfos.setdefault("total", 0)
except Exception as e: except Exception as e:
logging.error(f"Knowledge base retrieval error: {e}") logging.error(f"Knowledge base retrieval error: {e}")
@@ -87,12 +88,18 @@ class TreeStructuredQueryDecompositionRetrieval:
if d["doc_id"] not in dids: if d["doc_id"] not in dids:
chunk_info["doc_aggs"].append(d) chunk_info["doc_aggs"].append(d)
chunk_info["total"] = chunk_info.get("total", 0) + kbinfos.get("total", 0)
async def research(self, chunk_info, question, query, depth=3, callback=None): async def research(self, chunk_info, question, query, depth=3, callback=None):
if callback: if callback:
await callback("<START_DEEP_RESEARCH>") await callback("<START_DEEP_RESEARCH>")
await self._research(chunk_info, question, query, depth, callback) try:
if callback: await self._research(chunk_info, question, query, depth, callback)
await callback("<END_DEEP_RESEARCH>") except Exception:
logging.exception("Unhandled exception in deep research for query: %s", query)
finally:
if callback:
await callback("<END_DEEP_RESEARCH>")
async def _research(self, chunk_info, question, query, depth=3, callback=None): async def _research(self, chunk_info, question, query, depth=3, callback=None):
if depth == 0: if depth == 0:
@@ -111,14 +118,14 @@ class TreeStructuredQueryDecompositionRetrieval:
if callback: if callback:
await callback("Checking the sufficiency for retrieved information.") await callback("Checking the sufficiency for retrieved information.")
suff = await sufficiency_check(self.chat_mdl, question, ret) suff = await sufficiency_check(self.chat_mdl, question, ret)
if suff["is_sufficient"]: if suff.get("is_sufficient"):
if callback: if callback:
await callback(f"Yes, the retrieved information is sufficient for '{question}'.") await callback(f"Yes, the retrieved information is sufficient for '{question}'.")
return ret return ret
#if callback: #if callback:
# await callback("The retrieved information is not sufficient. Planing next steps...") # await callback("The retrieved information is not sufficient. Planing next steps...")
succ_question_info = await multi_queries_gen(self.chat_mdl, question, query, suff["missing_information"], ret) succ_question_info = await multi_queries_gen(self.chat_mdl, question, query, suff.get("missing_information", []), ret)
if callback: if callback:
await callback("Next step is to search for the following questions:</br> - " + "</br> - ".join(step["question"] for step in succ_question_info["questions"])) await callback("Next step is to search for the following questions:</br> - " + "</br> - ".join(step["question"] for step in succ_question_info["questions"]))
steps = [] steps = []