From 2971849783b879749ae25732f202e42acf0a3943 Mon Sep 17 00:00:00 2001 From: jaso0n0818 Date: Thu, 11 Jun 2026 18:30:03 +1000 Subject: [PATCH] fix: guard docStoreConn.delete with index_exist in parse and stop_parsing (#15876) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What problem does this PR solve? Closes #15874 Both the `POST /api/v1/datasets//chunks` (re-parse) and `DELETE /api/v1/datasets//chunks` (stop-parsing) handlers called `settings.docStoreConn.delete` unconditionally. When the tenant/dataset index has not been created yet — fresh dataset, first parse interrupted before any chunks were indexed, or index manually removed — the delete call throws and the handler returns HTTP 500 **after** the document state was already mutated (RUNNING with zeroed counters for the parse path; CANCEL with zeroed counters for the stop path), leaving the document in an inconsistent state. The newer `parse_documents` path in `document_api.py` already uses `index_exist` before deleting: ## How to fix? Apply the same `index_exist` guard to both call sites in `chunk_api.py`: - **`parse`** (POST path, line ~192): guard the delete before `TaskService.filter_delete`. - **`stop_parsing`** (DELETE path, line ~242): guard the delete after `DocumentService.update_by_id`. Both sites already have the correct `search.index_name(tenant_id)` and `dataset_id` parameters; the guard is a one-line addition at each site. ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) --------- Co-authored-by: Cursor Co-authored-by: Wang Qi --- api/apps/restful_apis/chunk_api.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/api/apps/restful_apis/chunk_api.py b/api/apps/restful_apis/chunk_api.py index 8e5058bcfa..47c3c9dac9 100644 --- a/api/apps/restful_apis/chunk_api.py +++ b/api/apps/restful_apis/chunk_api.py @@ -190,7 +190,16 @@ async def parse(tenant_id, dataset_id): == 0 ): return get_error_data_result("Can't parse document that is currently being processed") - settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id) + index_name = search.index_name(tenant_id) + if settings.docStoreConn.index_exist(index_name, dataset_id): + settings.docStoreConn.delete({"doc_id": id}, index_name, dataset_id) + else: + logging.info( + "Skipping chunk delete during parse for doc %s: index %s/%s does not exist", + id, + index_name, + dataset_id, + ) TaskService.filter_delete([Task.doc_id == id]) e, doc = DocumentService.get_by_id(id) doc = doc.to_dict() @@ -240,7 +249,16 @@ async def stop_parsing(tenant_id, dataset_id): cancel_all_task_of(id) info = {"run": "2", "progress": 0, "chunk_num": 0} DocumentService.update_by_id(id, info) - settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id) + index_name = search.index_name(tenant_id) + if settings.docStoreConn.index_exist(index_name, dataset_id): + settings.docStoreConn.delete({"doc_id": doc[0].id}, index_name, dataset_id) + else: + logging.info( + "Skipping chunk delete during stop_parsing for doc %s: index %s/%s does not exist", + doc[0].id, + index_name, + dataset_id, + ) success_count += 1 if duplicate_messages: if success_count > 0: