mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(api): POST /documents/stop removes partial chunks and resets counters (#15789)
### What problem does this PR solve?
`POST /api/v1/datasets/{dataset_id}/documents/stop`
(`stop_parse_documents`) cancels parsing tasks and sets `run` to
`CANCEL`, but it does **not** remove chunks already indexed in the doc
store or reset `progress` / `chunk_num`. REST callers can end up with a
“cancelled” document that still returns partial chunks in `GET
.../chunks` and in retrieval.
Legacy `DELETE /api/v1/datasets/{dataset_id}/chunks` (`stop_parsing`)
already performs full cleanup: it resets counters and calls
`docStoreConn.delete`. This PR aligns the newer stop endpoint with that
behavior so both paths leave the dataset consistent.
Fixes [#15788](https://github.com/infiniflow/ragflow/issues/15788).
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
### Changes
- Update `stop_parse_documents` in `document_api.py` to reset `progress`
and `chunk_num` to `0` and delete partial chunks via
`docStoreConn.delete` after `cancel_all_task_of`.
- Add unit test `test_stop_parse_documents_cleans_partial_chunks` to
assert counters reset and doc store delete is invoked.
### Test plan
- [x] Unit test: `pytest
test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py::TestDocRoutesUnit::test_stop_parse_documents_cleans_partial_chunks
-v`
- [ ] Manual: upload a slow document, start parse, call `POST
.../documents/stop` while `RUNNING`, verify `GET .../chunks` returns
zero chunks and UI `chunk_count` is 0
- [ ] Control: legacy `DELETE .../chunks` behavior unchanged
---------
Co-authored-by: Wang Qi <wangq8@outlook.com>
This commit is contained in:
@@ -1625,7 +1625,17 @@ async def stop_parse_documents(tenant_id, dataset_id):
|
||||
continue
|
||||
|
||||
cancel_all_task_of(doc_id)
|
||||
DocumentService.update_by_id(doc_id, {"run": str(TaskStatus.CANCEL.value)})
|
||||
DocumentService.update_by_id(
|
||||
doc_id,
|
||||
{
|
||||
"run": str(TaskStatus.CANCEL.value),
|
||||
"progress": 0,
|
||||
"chunk_num": 0,
|
||||
},
|
||||
)
|
||||
index_name = search.index_name(tenant_id)
|
||||
if settings.docStoreConn.index_exist(index_name, doc.kb_id):
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, index_name, doc.kb_id)
|
||||
success_count += 1
|
||||
|
||||
result = {"success_count": success_count}
|
||||
|
||||
@@ -731,6 +731,56 @@ class TestDocRoutesUnit:
|
||||
res = _run(module.stop_parsing.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["code"] == 0
|
||||
|
||||
def test_stop_parse_documents_cleans_partial_chunks(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch, module_basename="document_api")
|
||||
updated = []
|
||||
deleted = []
|
||||
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"document_ids": ["doc-1"]}))
|
||||
monkeypatch.setattr(module, "check_duplicate_ids", lambda ids, _kind: (ids, []))
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [object()])
|
||||
monkeypatch.setattr(
|
||||
module.DocumentService,
|
||||
"get_by_id",
|
||||
lambda _id: (True, _DummyDoc(doc_id="doc-1", run=module.TaskStatus.RUNNING.value)),
|
||||
)
|
||||
monkeypatch.setattr(module.TaskService, "query", lambda **_kwargs: [SimpleNamespace(progress=0.5)])
|
||||
monkeypatch.setattr(module, "cancel_all_task_of", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
module.DocumentService,
|
||||
"update_by_id",
|
||||
lambda doc_id, info: updated.append((doc_id, info)) or True,
|
||||
)
|
||||
_patch_docstore(
|
||||
monkeypatch,
|
||||
module,
|
||||
index_exist=lambda *_args, **_kwargs: True,
|
||||
delete=lambda condition, index, kb_id: deleted.append((condition, index, kb_id)),
|
||||
)
|
||||
|
||||
res = _run(module.stop_parse_documents.__wrapped__("tenant-1", "ds-1"))
|
||||
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["success_count"] == 1
|
||||
assert updated == [
|
||||
(
|
||||
"doc-1",
|
||||
{
|
||||
"run": module.TaskStatus.CANCEL.value,
|
||||
"progress": 0,
|
||||
"chunk_num": 0,
|
||||
},
|
||||
)
|
||||
]
|
||||
assert deleted == [({"doc_id": "doc-1"}, module.search.index_name("tenant-1"), "kb-1")]
|
||||
|
||||
deleted.clear()
|
||||
_patch_docstore(monkeypatch, module, index_exist=lambda *_args, **_kwargs: False)
|
||||
res = _run(module.stop_parse_documents.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["code"] == 0
|
||||
assert deleted == []
|
||||
|
||||
def test_list_chunks_branches(self, monkeypatch):
|
||||
module = _load_restful_chunk_module(monkeypatch)
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)
|
||||
|
||||
Reference in New Issue
Block a user