From de18313f97ffe5f008e98e8188a4d7500ca2dae7 Mon Sep 17 00:00:00 2001 From: kpdev <156195510+kiannidev@users.noreply.github.com> Date: Thu, 11 Jun 2026 00:51:32 -0700 Subject: [PATCH] fix(api): POST /documents/stop removes partial chunks and resets counters (#15789) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? `POST /api/v1/datasets/{dataset_id}/documents/stop` (`stop_parse_documents`) cancels parsing tasks and sets `run` to `CANCEL`, but it does **not** remove chunks already indexed in the doc store or reset `progress` / `chunk_num`. REST callers can end up with a “cancelled” document that still returns partial chunks in `GET .../chunks` and in retrieval. Legacy `DELETE /api/v1/datasets/{dataset_id}/chunks` (`stop_parsing`) already performs full cleanup: it resets counters and calls `docStoreConn.delete`. This PR aligns the newer stop endpoint with that behavior so both paths leave the dataset consistent. Fixes [#15788](https://github.com/infiniflow/ragflow/issues/15788). ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### Changes - Update `stop_parse_documents` in `document_api.py` to reset `progress` and `chunk_num` to `0` and delete partial chunks via `docStoreConn.delete` after `cancel_all_task_of`. - Add unit test `test_stop_parse_documents_cleans_partial_chunks` to assert counters reset and doc store delete is invoked. ### Test plan - [x] Unit test: `pytest test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py::TestDocRoutesUnit::test_stop_parse_documents_cleans_partial_chunks -v` - [ ] Manual: upload a slow document, start parse, call `POST .../documents/stop` while `RUNNING`, verify `GET .../chunks` returns zero chunks and UI `chunk_count` is 0 - [ ] Control: legacy `DELETE .../chunks` behavior unchanged --------- Co-authored-by: Wang Qi --- api/apps/restful_apis/document_api.py | 12 ++++- .../test_doc_sdk_routes_unit.py | 50 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index a4de6f9e3f..faf2445163 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -1625,7 +1625,17 @@ async def stop_parse_documents(tenant_id, dataset_id): continue cancel_all_task_of(doc_id) - DocumentService.update_by_id(doc_id, {"run": str(TaskStatus.CANCEL.value)}) + DocumentService.update_by_id( + doc_id, + { + "run": str(TaskStatus.CANCEL.value), + "progress": 0, + "chunk_num": 0, + }, + ) + index_name = search.index_name(tenant_id) + if settings.docStoreConn.index_exist(index_name, doc.kb_id): + settings.docStoreConn.delete({"doc_id": doc.id}, index_name, doc.kb_id) success_count += 1 result = {"success_count": success_count} diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py index 7e6bd4128d..2131aa533c 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py @@ -731,6 +731,56 @@ class TestDocRoutesUnit: res = _run(module.stop_parsing.__wrapped__("tenant-1", "ds-1")) assert res["code"] == 0 + def test_stop_parse_documents_cleans_partial_chunks(self, monkeypatch): + module = _load_doc_module(monkeypatch, module_basename="document_api") + updated = [] + deleted = [] + + monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True) + monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"document_ids": ["doc-1"]})) + monkeypatch.setattr(module, "check_duplicate_ids", lambda ids, _kind: (ids, [])) + monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [object()]) + monkeypatch.setattr( + module.DocumentService, + "get_by_id", + lambda _id: (True, _DummyDoc(doc_id="doc-1", run=module.TaskStatus.RUNNING.value)), + ) + monkeypatch.setattr(module.TaskService, "query", lambda **_kwargs: [SimpleNamespace(progress=0.5)]) + monkeypatch.setattr(module, "cancel_all_task_of", lambda *_args, **_kwargs: None) + monkeypatch.setattr( + module.DocumentService, + "update_by_id", + lambda doc_id, info: updated.append((doc_id, info)) or True, + ) + _patch_docstore( + monkeypatch, + module, + index_exist=lambda *_args, **_kwargs: True, + delete=lambda condition, index, kb_id: deleted.append((condition, index, kb_id)), + ) + + res = _run(module.stop_parse_documents.__wrapped__("tenant-1", "ds-1")) + + assert res["code"] == 0 + assert res["data"]["success_count"] == 1 + assert updated == [ + ( + "doc-1", + { + "run": module.TaskStatus.CANCEL.value, + "progress": 0, + "chunk_num": 0, + }, + ) + ] + assert deleted == [({"doc_id": "doc-1"}, module.search.index_name("tenant-1"), "kb-1")] + + deleted.clear() + _patch_docstore(monkeypatch, module, index_exist=lambda *_args, **_kwargs: False) + res = _run(module.stop_parse_documents.__wrapped__("tenant-1", "ds-1")) + assert res["code"] == 0 + assert deleted == [] + def test_list_chunks_branches(self, monkeypatch): module = _load_restful_chunk_module(monkeypatch) monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)