diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 1f443374fb..faa29fb59d 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -423,29 +423,6 @@ async def doc_infos(): return get_json_result(data=docs_list) -@manager.route("/metadata/summary", methods=["POST"]) # noqa: F821 -@login_required -async def metadata_summary(): - req = await get_request_json() - kb_id = req.get("kb_id") - doc_ids = req.get("doc_ids") - if not kb_id: - return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR) - - tenants = UserTenantService.query(user_id=current_user.id) - for tenant in tenants: - if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id): - break - else: - return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR) - - try: - summary = DocMetadataService.get_metadata_summary(kb_id, doc_ids) - return get_json_result(data={"summary": summary}) - except Exception as e: - return server_error_response(e) - - @manager.route("/metadata/update", methods=["POST"]) # noqa: F821 @login_required @validate_request("doc_ids") diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 669f895a96..9ce7bf6670 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -15,6 +15,7 @@ # import logging +from quart import request from peewee import OperationalError from pydantic import ValidationError @@ -25,7 +26,8 @@ from api.db.services.document_service import DocumentService from api.db.services.knowledgebase_service import KnowledgebaseService from common.constants import RetCode from api.apps import login_required -from api.utils.api_utils import get_error_data_result, get_result, add_tenant_id_to_kwargs, get_request_json +from api.utils.api_utils import get_error_data_result, get_result, add_tenant_id_to_kwargs, get_request_json, \ + server_error_response from api.utils.validation_utils import ( UpdateDocumentReq, format_validation_error_message, ) @@ -144,3 +146,40 @@ async def update_document(tenant_id, dataset_id, document_id): renamed_doc = rename_doc_key(doc) return get_result(data=renamed_doc) + +@manager.route("/datasets//metadata/summary", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def metadata_summary(dataset_id, tenant_id): + """ + Get metadata summary for a dataset. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: query + name: doc_ids + type: string + required: false + description: Comma-separated document IDs to filter metadata. + responses: + 200: + description: Metadata summary retrieved successfully. + """ + if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id): + return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ") + # Get doc_ids from query parameters (comma-separated string) + doc_ids_param = request.args.get("doc_ids", "") + doc_ids = doc_ids_param.split(",") if doc_ids_param else None + try: + summary = DocMetadataService.get_metadata_summary(dataset_id, doc_ids) + return get_result(data={"summary": summary}) + except Exception as e: + return server_error_response(e) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 72964ae35a..d3b54c43b4 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -451,19 +451,6 @@ def list_docs(dataset_id, tenant_id): return get_result(data={"total": total, "docs": output_docs}) -@manager.route("/datasets//metadata/summary", methods=["GET"]) # noqa: F821 -@token_required -async def metadata_summary(dataset_id, tenant_id): - if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id): - return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ") - req = await get_request_json() - try: - summary = DocMetadataService.get_metadata_summary(dataset_id, req.get("doc_ids")) - return get_result(data={"summary": summary}) - except Exception as e: - return server_error_response(e) - - @manager.route("/datasets//metadata/update", methods=["POST"]) # noqa: F821 @token_required async def metadata_batch_update(dataset_id, tenant_id): diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py index 8f4ea7653d..ce06f2bad1 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py @@ -472,7 +472,7 @@ class TestDocRoutesUnit: assert res["code"] == 0 assert res["data"]["docs"] == [] - def test_metadata_summary_and_batch_update(self, monkeypatch): + def test_metadata_batch_update(self, monkeypatch): module = _load_doc_module(monkeypatch) monkeypatch.setattr(module, "convert_conditions", lambda cond: cond) monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False) @@ -480,22 +480,7 @@ class TestDocRoutesUnit: res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1")) assert "don't own the dataset" in res["message"] - monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False) - res = _run(module.metadata_summary.__wrapped__("ds-1", "tenant-1")) - assert "don't own the dataset" in res["message"] - monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True) - monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"doc_ids": ["d1"]})) - monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", lambda *_args, **_kwargs: {"k": 1}) - res = _run(module.metadata_summary.__wrapped__("ds-1", "tenant-1")) - assert res["code"] == 0 - assert res["data"]["summary"] == {"k": 1} - - monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("x"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.metadata_summary.__wrapped__("ds-1", "tenant-1")) - assert res["code"] == 500 - monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"selector": [1]})) res = _run(module.metadata_batch_update.__wrapped__("ds-1", "tenant-1")) assert res["message"] == "selector must be an object." @@ -576,6 +561,7 @@ class TestDocRoutesUnit: assert res["data"]["updated"] == 1 assert res["data"]["matched_docs"] == 1 + def test_delete_branches(self, monkeypatch): module = _load_doc_module(monkeypatch) monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False) diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_summary.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_summary.py index 0791ead388..4c231277b1 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_summary.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_summary.py @@ -13,11 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# Although the docs group this under "chunk management," the backend aggregates -# Document.meta_fields via document_service#get_metadata_summary and the test -# uses update_document, so it belongs with file/document management tests. -# import pytest -#from common import metadata_summary, update_document +import pytest +from common import metadata_summary, update_document def _summary_to_counts(summary): @@ -30,29 +27,63 @@ def _summary_to_counts(summary): class TestMetadataSummary: - pass + @pytest.mark.p2 + def test_metadata_summary_missing_kb_id(self, HttpApiAuth, add_document_func): + """ + Call with non-existent dataset + :param HttpApiAuth: + :param add_document_func: + :return: + """ + res = metadata_summary(HttpApiAuth, "") + assert res["code"] == 404, res + assert res["message"] == "Not Found: /api/v1/datasets//metadata/summary", res - # Alteration of API - # TODO - #@pytest.mark.p2 - #def test_metadata_summary_counts(self, HttpApiAuth, add_documents_func): - # dataset_id, document_ids = add_documents_func - # payloads = [ - # {"tags": ["foo", "bar"], "author": "alice"}, - # {"tags": ["foo"], "author": "bob"}, - # {"tags": ["bar", "baz"], "author": None}, - # ] - # for doc_id, meta_fields in zip(document_ids, payloads): - # res = update_document(HttpApiAuth, dataset_id, doc_id, {"meta_fields": meta_fields}) - # assert res["code"] == 0, res + @pytest.mark.p2 + def test_metadata_summary_invalid_kb_id(self, HttpApiAuth, add_document_func): + """Test metadata summary when user doesn't have access to the dataset.""" + kb_id, doc_id = add_document_func + invalid_kb_id = "invalid_" + kb_id + # Call with a dataset that the user doesn't have access to + res = metadata_summary(HttpApiAuth, invalid_kb_id) + assert res["code"] == 102, res + assert res["message"] == f"You don't own the dataset {invalid_kb_id}. " - # res = metadata_summary(HttpApiAuth, dataset_id) - # assert res["code"] == 0, res - # summary = res["data"]["summary"] - # counts = _summary_to_counts(summary) - # assert counts["tags"]["foo"] == 2, counts - # assert counts["tags"]["bar"] == 2, counts - # assert counts["tags"]["baz"] == 1, counts - # assert counts["author"]["alice"] == 1, counts - # assert counts["author"]["bob"] == 1, counts - # assert "None" not in counts["author"], counts + @pytest.mark.p2 + def test_metadata_summary_success(self, HttpApiAuth, add_document_func): + """Test metadata summary success case""" + kb_id, doc_id = add_document_func + # Test successful case + res = metadata_summary(HttpApiAuth, kb_id) + assert res["code"] == 0, res + assert "summary" in res["data"], res + + @pytest.mark.p2 + def test_metadata_summary_counts(self, HttpApiAuth, add_documents_func): + """ + test normal cases + :param HttpApiAuth: + :param add_documents_func: + :return: + """ + dataset_id, document_ids = add_documents_func + payloads = [ + {"tags": ["foo", "bar"], "author": "alice"}, + {"tags": ["foo"], "author": "bob"}, + {"tags": ["bar", "baz"], "author": ""}, + ] + for doc_id, meta_fields in zip(document_ids, payloads): + res = update_document(HttpApiAuth, dataset_id, doc_id, {"meta_fields": meta_fields}) + assert res["code"] == 0, res + + res = metadata_summary(HttpApiAuth, dataset_id) + assert res["code"] == 0, res + + summary = res["data"]["summary"] + counts = _summary_to_counts(summary) + assert counts["tags"]["foo"] == 2, counts + assert counts["tags"]["bar"] == 2, counts + assert counts["tags"]["baz"] == 1, counts + assert counts["author"]["alice"] == 1, counts + assert counts["author"]["bob"] == 1, counts + assert "None" not in counts["author"], counts diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 428295eb02..072ed6b89d 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -319,48 +319,6 @@ class TestDocumentMetadataUnit: assert res["code"] == 0 assert res["data"][0]["meta_fields"]["author"] == "alice" - def test_metadata_summary_missing_kb_id(self, document_app_module, monkeypatch): - module = document_app_module - - async def fake_request_json(): - return {"doc_ids": ["doc1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.metadata_summary()) - assert res["code"] == 101 - - def test_metadata_summary_unauthorized(self, document_app_module, monkeypatch): - module = document_app_module - monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant1")]) - monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: False) - - async def fake_request_json(): - return {"kb_id": "kb1", "doc_ids": ["doc1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.metadata_summary()) - assert res["code"] == 103 - - def test_metadata_summary_success_and_exception(self, document_app_module, monkeypatch): - module = document_app_module - self._allow_kb(module, monkeypatch) - monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", lambda *_args, **_kwargs: {"author": {"alice": 1}}) - - async def fake_request_json(): - return {"kb_id": "kb1", "doc_ids": ["doc1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.metadata_summary()) - assert res["code"] == 0 - assert "summary" in res["data"] - - def raise_error(*_args, **_kwargs): - raise RuntimeError("boom") - - monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", raise_error) - res = _run(module.metadata_summary()) - assert res["code"] == 100 - def test_metadata_update_missing_kb_id(self, document_app_module, monkeypatch): module = document_app_module diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts index d26f44da19..4831f6e1b3 100644 --- a/web/src/services/knowledge-service.ts +++ b/web/src/services/knowledge-service.ts @@ -263,7 +263,10 @@ export const getMetaDataService = ({ }: { kb_id: string; doc_ids?: string[]; -}) => request.post(api.getMetaData, { data: { kb_id, doc_ids } }); +}) => + request.get(api.getMetaData(kb_id), { + params: doc_ids?.length ? { doc_ids: doc_ids.join(',') } : undefined, + }); export const updateMetaData = ({ kb_id, doc_ids, diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 7edce6a205..64d60e59df 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -83,7 +83,8 @@ export default { unbindPipelineTask: ({ kb_id, type }: { kb_id: string; type: string }) => `${webAPI}/kb/unbind_task?kb_id=${kb_id}&pipeline_task_type=${type}`, pipelineRerun: `${webAPI}/canvas/rerun`, - getMetaData: `${webAPI}/document/metadata/summary`, + getMetaData: (datasetId: string) => + `${restAPIv1}/datasets/${datasetId}/metadata/summary`, updateMetaData: `${webAPI}/document/metadata/update`, kbUpdateMetaData: `${webAPI}/kb/update_metadata_setting`, documentUpdateMetaData: `${webAPI}/document/update_metadata_setting`,