From ad789f5c437d8f20b718da12c06ce79dce971f7e Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Wed, 8 Apr 2026 13:38:30 +0800 Subject: [PATCH] Fix list files (#13960) ### What problem does this PR solve? As title. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ## Summary by CodeRabbit * **Bug Fixes** * Standardized the query parameter used when listing documents so listings behave consistently across the web and client interfaces. * Clarified the error message shown when a required dataset ID is missing to give clearer guidance to users. * **Tests** * Updated test coverage to reflect the standardized dataset identifier usage. --------- Signed-off-by: Jin Hai --- admin/client/ragflow_client.py | 2 +- api/apps/document_app.py | 4 +-- sdk/python/test/test_frontend_api/common.py | 2 +- test/testcases/test_web_api/conftest.py | 2 +- .../test_web_api/test_chunk_app/conftest.py | 2 +- .../test_document_app/conftest.py | 6 ++-- .../test_document_app/test_list_documents.py | 36 +++++++++---------- .../test_document_app/test_paser_documents.py | 12 +++---- .../test_document_app/test_rm_documents.py | 6 ++-- .../test_kb_app/test_kb_pipeline_tasks.py | 2 +- 10 files changed, 37 insertions(+), 37 deletions(-) diff --git a/admin/client/ragflow_client.py b/admin/client/ragflow_client.py index 9d9dcab814..0a8adc1699 100644 --- a/admin/client/ragflow_client.py +++ b/admin/client/ragflow_client.py @@ -1685,7 +1685,7 @@ class RAGFlowClient: time.sleep(0.5) def _list_documents(self, dataset_name: str, dataset_id: str): - response = self.http_client.request("POST", f"/document/list?kb_id={dataset_id}", use_api_base=False, + response = self.http_client.request("POST", f"/document/list?id={dataset_id}", use_api_base=False, auth_kind="web") res_json = response.json() if response.status_code != 200: diff --git a/api/apps/document_app.py b/api/apps/document_app.py index dd66144ee5..580eef6e55 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -240,9 +240,9 @@ async def create(): @manager.route("/list", methods=["POST"]) # noqa: F821 @login_required async def list_docs(): - kb_id = request.args.get("kb_id") + kb_id = request.args.get("id") if not kb_id: - return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR) + return get_json_result(data=False, message='Dataset ID is required for listing files.', code=RetCode.ARGUMENT_ERROR) tenants = UserTenantService.query(user_id=current_user.id) for tenant in tenants: if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id): diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py index 63aea2c8fa..20672d1c66 100644 --- a/sdk/python/test/test_frontend_api/common.py +++ b/sdk/python/test/test_frontend_api/common.py @@ -69,7 +69,7 @@ def upload_file(auth, dataset_id, path): def list_document(auth, dataset_id): authorization = {"Authorization": auth} - url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}" + url = f"{HOST_ADDRESS}/v1/document/list?id={dataset_id}" json = {} res = requests.post(url=url, headers=authorization, json=json) return res.json() diff --git a/test/testcases/test_web_api/conftest.py b/test/testcases/test_web_api/conftest.py index c9f9beed8f..2d6569ef91 100644 --- a/test/testcases/test_web_api/conftest.py +++ b/test/testcases/test_web_api/conftest.py @@ -49,7 +49,7 @@ from utils.file_utils import ( @wait_for(30, 1, "Document parsing timeout") def condition(_auth, _kb_id): - res = list_documents(_auth, {"kb_id": _kb_id}) + res = list_documents(_auth, {"id": _kb_id}) for doc in res["data"]["docs"]: if doc["run"] != "3": return False diff --git a/test/testcases/test_web_api/test_chunk_app/conftest.py b/test/testcases/test_web_api/test_chunk_app/conftest.py index e51a2f09bf..b5daa2e65a 100644 --- a/test/testcases/test_web_api/test_chunk_app/conftest.py +++ b/test/testcases/test_web_api/test_chunk_app/conftest.py @@ -24,7 +24,7 @@ from utils import wait_for @wait_for(30, 1, "Document parsing timeout") def condition(_auth, _kb_id): - res = list_documents(_auth, {"kb_id": _kb_id}) + res = list_documents(_auth, {"id": _kb_id}) for doc in res["data"]["docs"]: if doc["run"] != "3": return False diff --git a/test/testcases/test_web_api/test_document_app/conftest.py b/test/testcases/test_web_api/test_document_app/conftest.py index b5470939bf..3a08e006ac 100644 --- a/test/testcases/test_web_api/test_document_app/conftest.py +++ b/test/testcases/test_web_api/test_document_app/conftest.py @@ -34,7 +34,7 @@ class _DummyManager: @pytest.fixture(scope="function") def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir): def cleanup(): - res = list_documents(WebApiAuth, {"kb_id": dataset_id}) + res = list_documents(WebApiAuth, {"id": dataset_id}) for doc in res["data"]["docs"]: delete_document(WebApiAuth, {"doc_id": doc["id"]}) @@ -47,7 +47,7 @@ def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir): @pytest.fixture(scope="class") def add_documents(request, WebApiAuth, add_dataset, ragflow_tmp_dir): def cleanup(): - res = list_documents(WebApiAuth, {"kb_id": dataset_id}) + res = list_documents(WebApiAuth, {"id": dataset_id}) for doc in res["data"]["docs"]: delete_document(WebApiAuth, {"doc_id": doc["id"]}) @@ -60,7 +60,7 @@ def add_documents(request, WebApiAuth, add_dataset, ragflow_tmp_dir): @pytest.fixture(scope="function") def add_documents_func(request, WebApiAuth, add_dataset_func, ragflow_tmp_dir): def cleanup(): - res = list_documents(WebApiAuth, {"kb_id": dataset_id}) + res = list_documents(WebApiAuth, {"id": dataset_id}) for doc in res["data"]["docs"]: delete_document(WebApiAuth, {"doc_id": doc["id"]}) diff --git a/test/testcases/test_web_api/test_document_app/test_list_documents.py b/test/testcases/test_web_api/test_document_app/test_list_documents.py index 8115220efe..e3deda0920 100644 --- a/test/testcases/test_web_api/test_document_app/test_list_documents.py +++ b/test/testcases/test_web_api/test_document_app/test_list_documents.py @@ -34,7 +34,7 @@ class TestAuthorization: ], ) def test_invalid_auth(self, invalid_auth, expected_code, expected_message): - res = list_documents(invalid_auth, {"kb_id": "dataset_id"}) + res = list_documents(invalid_auth, {"id": "dataset_id"}) assert res["code"] == expected_code assert res["message"] == expected_message @@ -43,7 +43,7 @@ class TestDocumentsList: @pytest.mark.p1 def test_default(self, WebApiAuth, add_documents): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id}) + res = list_documents(WebApiAuth, {"id": kb_id}) assert res["code"] == 0 assert len(res["data"]["docs"]) == 5 assert res["data"]["total"] == 5 @@ -57,7 +57,7 @@ class TestDocumentsList: ], ) def test_invalid_dataset_id(self, WebApiAuth, kb_id, expected_code, expected_message): - res = list_documents(WebApiAuth, {"kb_id": kb_id}) + res = list_documents(WebApiAuth, {"id": kb_id}) assert res["code"] == expected_code assert res["message"] == expected_message @@ -76,7 +76,7 @@ class TestDocumentsList: ) def test_page(self, WebApiAuth, add_documents, params, expected_code, expected_page_size, expected_message): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id, **params}) + res = list_documents(WebApiAuth, {"id": kb_id, **params}) assert res["code"] == expected_code, res if expected_code == 0: assert len(res["data"]["docs"]) == expected_page_size, res @@ -99,7 +99,7 @@ class TestDocumentsList: ) def test_page_size(self, WebApiAuth, add_documents, params, expected_code, expected_page_size, expected_message): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id, **params}) + res = list_documents(WebApiAuth, {"id": kb_id, **params}) assert res["code"] == expected_code, res if expected_code == 0: assert len(res["data"]["docs"]) == expected_page_size, res @@ -119,7 +119,7 @@ class TestDocumentsList: ) def test_orderby(self, WebApiAuth, add_documents, params, expected_code, assertions, expected_message): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id, **params}) + res = list_documents(WebApiAuth, {"id": kb_id, **params}) assert res["code"] == expected_code, res if expected_code == 0: if callable(assertions): @@ -144,7 +144,7 @@ class TestDocumentsList: ) def test_desc(self, WebApiAuth, add_documents, params, expected_code, assertions, expected_message): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id, **params}) + res = list_documents(WebApiAuth, {"id": kb_id, **params}) assert res["code"] == expected_code, res if expected_code == 0: if callable(assertions): @@ -165,7 +165,7 @@ class TestDocumentsList: ) def test_keywords(self, WebApiAuth, add_documents, params, expected_num): kb_id, _ = add_documents - res = list_documents(WebApiAuth, {"kb_id": kb_id, **params}) + res = list_documents(WebApiAuth, {"id": kb_id, **params}) assert res["code"] == 0, res assert len(res["data"]["docs"]) == expected_num, res assert res["data"]["total"] == expected_num, res @@ -210,11 +210,11 @@ class TestDocumentsListUnit: monkeypatch.setattr(module, "get_request_json", fake_request_json) res = _run(module.list_docs()) assert res["code"] == 101 - assert res["message"] == 'Lack of "KB ID"' + assert res["message"] == 'Dataset ID is required for listing files.' def test_unauthorized_dataset(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant1")]) monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: False) @@ -228,7 +228,7 @@ class TestDocumentsListUnit: def test_return_empty_metadata_flags(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) monkeypatch.setattr(module.DocumentService, "get_by_kb_id", lambda *_args, **_kwargs: ([], 0)) @@ -248,7 +248,7 @@ class TestDocumentsListUnit: def test_invalid_filters(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) async def fake_request_json(): @@ -269,7 +269,7 @@ class TestDocumentsListUnit: def test_invalid_metadata_types(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) async def fake_request_json(): @@ -290,7 +290,7 @@ class TestDocumentsListUnit: def test_metadata_condition_empty_result(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda *_args, **_kwargs: {}) monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: set()) @@ -305,7 +305,7 @@ class TestDocumentsListUnit: def test_metadata_values_intersection(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) metas = { "author": {"alice": ["doc1", "doc2"]}, @@ -334,7 +334,7 @@ class TestDocumentsListUnit: def test_metadata_intersection_empty(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) metas = { "author": {"alice": ["doc1"]}, @@ -352,7 +352,7 @@ class TestDocumentsListUnit: def test_desc_time_and_schema(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1", desc="false", create_time_from="150", create_time_to="250") + self._set_args(module, monkeypatch, id="kb1", desc="false", create_time_from="150", create_time_to="250") self._allow_kb(module, monkeypatch) docs = [ @@ -377,7 +377,7 @@ class TestDocumentsListUnit: def test_exception_path(self, document_app_module, monkeypatch): module = document_app_module - self._set_args(module, monkeypatch, kb_id="kb1") + self._set_args(module, monkeypatch, id="kb1") self._allow_kb(module, monkeypatch) def raise_error(*_args, **_kwargs): diff --git a/test/testcases/test_web_api/test_document_app/test_paser_documents.py b/test/testcases/test_web_api/test_document_app/test_paser_documents.py index 06ee79c775..c455c256e0 100644 --- a/test/testcases/test_web_api/test_document_app/test_paser_documents.py +++ b/test/testcases/test_web_api/test_document_app/test_paser_documents.py @@ -30,7 +30,7 @@ def _run(coro): @wait_for(30, 1, "Document parsing timeout") def condition(_auth, _kb_id, _document_ids=None): - res = list_documents(_auth, {"kb_id": _kb_id}) + res = list_documents(_auth, {"id": _kb_id}) target_docs = res["data"]["docs"] if _document_ids is None: @@ -48,7 +48,7 @@ def condition(_auth, _kb_id, _document_ids=None): def validate_document_parse_done(auth, _kb_id, _document_ids): - res = list_documents(auth, {"kb_id": _kb_id}) + res = list_documents(auth, {"id": _kb_id}) for doc in res["data"]["docs"]: if doc["id"] not in _document_ids: continue @@ -60,7 +60,7 @@ def validate_document_parse_done(auth, _kb_id, _document_ids): def validate_document_parse_cancel(auth, _kb_id, _document_ids): - res = list_documents(auth, {"kb_id": _kb_id}) + res = list_documents(auth, {"id": _kb_id}) for doc in res["data"]["docs"]: if doc["id"] not in _document_ids: continue @@ -151,7 +151,7 @@ class TestDocumentsParse: def test_parse_100_files(WebApiAuth, add_dataset_func, tmp_path): @wait_for(100, 1, "Document parsing timeout") def condition(_auth, _kb_id, _document_num): - res = list_documents(_auth, {"kb_id": _kb_id, "page_size": _document_num}) + res = list_documents(_auth, {"id": _kb_id, "page_size": _document_num}) for doc in res["data"]["docs"]: if doc["run"] != "3": return False @@ -172,7 +172,7 @@ def test_parse_100_files(WebApiAuth, add_dataset_func, tmp_path): def test_concurrent_parse(WebApiAuth, add_dataset_func, tmp_path): @wait_for(120, 1, "Document parsing timeout") def condition(_auth, _kb_id, _document_num): - res = list_documents(_auth, {"kb_id": _kb_id, "page_size": _document_num}) + res = list_documents(_auth, {"id": _kb_id, "page_size": _document_num}) for doc in res["data"]["docs"]: if doc["run"] != "3": return False @@ -305,7 +305,7 @@ class TestDocumentsParseStop: def test_basic_scenarios(self, WebApiAuth, add_documents_func, payload, expected_code, expected_message): @wait_for(30, 1, "Document parsing timeout") def condition(_auth, _kb_id, _doc_ids): - res = list_documents(_auth, {"kb_id": _kb_id}) + res = list_documents(_auth, {"id": _kb_id}) for doc in res["data"]["docs"]: if doc["id"] in _doc_ids: if doc["run"] != "3": diff --git a/test/testcases/test_web_api/test_document_app/test_rm_documents.py b/test/testcases/test_web_api/test_document_app/test_rm_documents.py index ebcf570fa0..3efccb2839 100644 --- a/test/testcases/test_web_api/test_document_app/test_rm_documents.py +++ b/test/testcases/test_web_api/test_document_app/test_rm_documents.py @@ -63,7 +63,7 @@ class TestDocumentsDeletion: if res["code"] != 0: assert res["message"] == expected_message, res - res = list_documents(WebApiAuth, {"kb_id": kb_id}) + res = list_documents(WebApiAuth, {"id": kb_id}) assert len(res["data"]["docs"]) == remaining, res assert res["data"]["total"] == remaining, res @@ -124,12 +124,12 @@ def test_delete_100(WebApiAuth, add_dataset, tmp_path): documents_num = 100 kb_id = add_dataset document_ids = bulk_upload_documents(WebApiAuth, kb_id, documents_num, tmp_path) - res = list_documents(WebApiAuth, {"kb_id": kb_id}) + res = list_documents(WebApiAuth, {"id": kb_id}) assert res["data"]["total"] == documents_num, res for doc_id in document_ids: res = delete_document(WebApiAuth, {"doc_id": doc_id}) assert res["code"] == 0, res - res = list_documents(WebApiAuth, {"kb_id": kb_id}) + res = list_documents(WebApiAuth, {"id": kb_id}) assert res["data"]["total"] == 0, res diff --git a/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py b/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py index a7404badea..088d1182ac 100644 --- a/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py +++ b/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py @@ -75,7 +75,7 @@ def _wait_for_task(trace_func, auth, kb_id, task_id, timeout=60, use_params_payl def _wait_for_docs_parsed(auth, kb_id, timeout=60): @wait_for(timeout, 2, "Document parsing timeout") def _condition(): - res = list_documents(auth, {"kb_id": kb_id}) + res = list_documents(auth, {"id": kb_id}) if res["code"] != 0: return False for doc in res["data"]["docs"]: