diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 4b138fd564..8d72ee9bf8 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -183,22 +183,6 @@ async def create(): return server_error_response(e) -@manager.route("/infos", methods=["POST"]) # noqa: F821 -@login_required -async def doc_infos(): - req = await get_request_json() - doc_ids = req["doc_ids"] - for doc_id in doc_ids: - if not DocumentService.accessible(doc_id, current_user.id): - return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) - docs = DocumentService.get_by_ids(doc_ids) - docs_list = list(docs.dicts()) - # Add meta_fields for each document - for doc in docs_list: - doc["meta_fields"] = DocMetadataService.get_document_metadata(doc["id"]) - return get_json_result(data=docs_list) - - @manager.route("/metadata/update", methods=["POST"]) # noqa: F821 @login_required @validate_request("doc_ids") diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index a18ca20805..119b4be208 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -527,6 +527,12 @@ def _get_docs_with_request(req, dataset_id:str): if doc_name and not DocumentService.query(name=doc_name, kb_id=dataset_id): return RetCode.DATA_ERROR, f"You don't own the document {doc_name}.", [], 0 + doc_ids = q.getlist("ids") + if doc_id and len(doc_ids) > 0: + return RetCode.DATA_ERROR, f"Should not provide both 'id':{doc_id} and 'ids'{doc_ids}" + if len(doc_ids) > 0: + doc_ids_filter = doc_ids + docs, total = DocumentService.get_by_kb_id(dataset_id, page, page_size, orderby, desc, keywords, run_status_converted, types, suffix, name=doc_name, doc_ids=doc_ids_filter, return_empty_metadata=return_empty_metadata) diff --git a/sdk/python/ragflow_sdk/modules/dataset.py b/sdk/python/ragflow_sdk/modules/dataset.py index 158cebfa81..b464fe70de 100644 --- a/sdk/python/ragflow_sdk/modules/dataset.py +++ b/sdk/python/ragflow_sdk/modules/dataset.py @@ -66,6 +66,7 @@ class DataSet(Base): def list_documents( self, id: str | None = None, + ids: list[str] | None = None, name: str | None = None, keywords: str | None = None, page: int = 1, @@ -75,6 +76,10 @@ class DataSet(Base): create_time_from: int = 0, create_time_to: int = 0, ): + # Validate that id and ids are not used together + if id and ids: + raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.") + params = { "id": id, "name": name, @@ -86,6 +91,10 @@ class DataSet(Base): "create_time_from": create_time_from, "create_time_to": create_time_to, } + # Handle ids parameter - convert to multiple query params + if ids: + for doc_id in ids: + params.append(("ids", doc_id)) res = self.get(f"/datasets/{self.id}/documents", params=params) res = res.json() documents = [] diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py index 20672d1c66..e054bba8f3 100644 --- a/sdk/python/test/test_frontend_api/common.py +++ b/sdk/python/test/test_frontend_api/common.py @@ -75,11 +75,36 @@ def list_document(auth, dataset_id): return res.json() -def get_docs_info(auth, doc_ids): +def get_docs_info(auth, dataset_id, doc_ids=None, doc_id=None): + """ + Get document information by IDs. + + Args: + auth: Authorization header + dataset_id: Dataset ID + doc_ids: List of document IDs (use for multiple) - exclusive with doc_id + doc_id: Single document ID (use for one) - exclusive with doc_ids + + Raises: + ValueError: If both doc_id and doc_ids are provided + """ + # Validate that id and ids are not used together + if doc_id and doc_ids: + raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.") + authorization = {"Authorization": auth} - json_req = {"doc_ids": doc_ids} - url = f"{HOST_ADDRESS}/v1/document/infos" - res = requests.post(url=url, headers=authorization, json=json_req) + params = {} + if doc_ids: + # Multiple IDs + for id in doc_ids: + params.append(("ids", id)) + elif doc_id: + # Single ID + params["id"] = doc_id + + # Use /api/v1 prefix for dataset API + url = f"{HOST_ADDRESS}/api/v1/datasets/{dataset_id}/documents" + res = requests.get(url=url, headers=authorization, params=params) return res.json() diff --git a/sdk/python/test/test_frontend_api/test_chunk.py b/sdk/python/test/test_frontend_api/test_chunk.py index afcab865d5..fadeb10ee2 100644 --- a/sdk/python/test/test_frontend_api/test_chunk.py +++ b/sdk/python/test/test_frontend_api/test_chunk.py @@ -48,14 +48,14 @@ def test_parse_txt_document(get_auth): for doc in res['data']['docs']: doc_id_list.append(doc['id']) - res = get_docs_info(get_auth, doc_id_list) + res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list) print(doc_id_list) doc_count = len(doc_id_list) res = parse_docs(get_auth, doc_id_list) start_ts = timer() while True: - res = get_docs_info(get_auth, doc_id_list) + res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list) finished_count = 0 for doc_info in res['data']: if doc_info['progress'] == 1: diff --git a/test/testcases/test_web_api/test_common.py b/test/testcases/test_web_api/test_common.py index 621246343e..bab80fca2a 100644 --- a/test/testcases/test_web_api/test_common.py +++ b/test/testcases/test_web_api/test_common.py @@ -397,8 +397,8 @@ def document_filter(auth, dataset_id, payload=None, *, headers=HEADERS, data=Non return res.json() -def document_infos(auth, payload=None, *, headers=HEADERS, data=None): - res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/infos", headers=headers, auth=auth, json=payload, data=data) +def document_infos(auth, dataset_id, params=None, payload=None, *, headers=HEADERS, data=None): + res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents", params=params, json=payload, headers=headers, auth=auth, data=data) return res.json() diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 84d7e509c0..8dacada2d1 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -44,7 +44,7 @@ class TestAuthorization: @pytest.mark.p2 @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES) def test_infos_auth_invalid(self, invalid_auth, expected_code, expected_fragment): - res = document_infos(invalid_auth, {"doc_ids": ["doc_id"]}) + res = document_infos(invalid_auth, "kb_id", {"doc_ids": ["doc_id"]}) assert res["code"] == expected_code, res assert expected_fragment in res["message"], res @@ -91,11 +91,12 @@ class TestDocumentMetadata: @pytest.mark.p2 def test_infos(self, WebApiAuth, add_document_func): - _, doc_id = add_document_func - res = document_infos(WebApiAuth, {"doc_ids": [doc_id]}) + dataset_id, doc_id = add_document_func + res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) assert res["code"] == 0, res - assert len(res["data"]) == 1, res - assert res["data"][0]["id"] == doc_id, res + docs = res["data"]["docs"] + assert len(docs) == 1, docs + assert docs[0]["id"] == doc_id, res ## The inputs has been changed to add 'doc_ids' ## TODO: @@ -138,20 +139,22 @@ class TestDocumentMetadata: @pytest.mark.p2 def test_change_status(self, WebApiAuth, add_document_func): - _, doc_id = add_document_func + dataset_id, doc_id = add_document_func res = document_change_status(WebApiAuth, {"doc_ids": [doc_id], "status": "1"}) + assert res["code"] == 0, res assert res["data"][doc_id]["status"] == "1", res - info_res = document_infos(WebApiAuth, {"doc_ids": [doc_id]}) + info_res = document_infos(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert info_res["code"] == 0, info_res - assert info_res["data"][0]["status"] == "1", info_res + assert info_res["data"]["docs"][0]["status"] == "1", info_res class TestDocumentMetadataNegative: @pytest.mark.p2 def test_filter_missing_kb_id(self, WebApiAuth, add_document_func): kb_id, doc_id = add_document_func - res = document_filter(WebApiAuth, "", {"doc_ids": [doc_id]}) + res = document_filter(WebApiAuth, "", {"ids": [doc_id]}) assert res["code"] == 100, res assert "" == res["message"], res @@ -228,26 +231,6 @@ class TestDocumentMetadataUnit: monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id=tenant_id)]) monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: True if _kwargs.get("id") == kb_id else False) - - def test_infos_meta_fields(self, document_app_module, monkeypatch): - module = document_app_module - monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True) - - class _Docs: - def dicts(self): - return [{"id": "doc1"}] - - monkeypatch.setattr(module.DocumentService, "get_by_ids", lambda _ids: _Docs()) - monkeypatch.setattr(module.DocMetadataService, "get_document_metadata", lambda _doc_id: {"author": "alice"}) - - async def fake_request_json(): - return {"doc_ids": ["doc1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.doc_infos()) - assert res["code"] == 0 - assert res["data"][0]["meta_fields"]["author"] == "alice" - def test_metadata_update_missing_kb_id(self, document_app_module, monkeypatch): module = document_app_module diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts index de31515030..ac5633a5d0 100644 --- a/web/src/services/knowledge-service.ts +++ b/web/src/services/knowledge-service.ts @@ -34,7 +34,6 @@ const { documentUpload, webCrawl, knowledgeGraph, - documentInfos, listTagByKnowledgeIds, setMeta, getMeta, @@ -101,10 +100,6 @@ const methods = { url: webCrawl, method: 'post', }, - documentInfos: { - url: documentInfos, - method: 'post', - }, setMeta: { url: setMeta, method: 'post', diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 40433d0c0c..3f749a833f 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -123,7 +123,6 @@ export default { documentUpload: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}/documents`, webCrawl: `${webAPI}/document/web_crawl`, - documentInfos: `${webAPI}/document/infos`, uploadAndParse: `${webAPI}/document/upload_info`, setMeta: `${webAPI}/document/set_meta`, getDatasetFilter: (datasetId: string) =>