From c08ced09a7356940dbc1ab3510681e7c77f5ed59 Mon Sep 17 00:00:00 2001 From: buua436 Date: Wed, 29 Apr 2026 14:44:31 +0800 Subject: [PATCH] Fix: add retrieval fallback comments (#14457) ### What problem does this PR solve? add retrieval fallback comments ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 23e86cb9db..8563e9bd58 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -74,6 +74,11 @@ class Dealer: return await thread_pool_exec(_load) async def _prune_deleted_chunks(self, sres: SearchResult) -> SearchResult: + # Temporary safety net: + # Some delete paths can leave stale chunks in the doc store if the DB row + # is removed but the vector record is not fully cleaned up. We filter those + # chunks here so chat/retrieval does not surface content from deleted docs. + # Keep this as a fallback, not as the primary delete mechanism. chunk_doc_ids = [chunk.get("doc_id") for chunk in sres.field.values() if chunk and chunk.get("doc_id")] if not chunk_doc_ids: return sres @@ -488,6 +493,8 @@ class Dealer: sres = await self.search(req, [index_name(tid) for tid in tenant_ids], kb_ids, embd_mdl, highlight, rank_feature=rank_feature) + # Temporary retrieval-side guard: prune chunks whose parent document no + # longer exists before reranking and returning results. sres = await self._prune_deleted_chunks(sres) if sres.total == 0: ranks["doc_aggs"] = []