diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 9aa641ccfc..cf297c4b25 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -116,15 +116,30 @@ async def download_doc(document_id): if len(token) != 2: return get_error_data_result(message="Authorization is not valid!") token = token[1] + logging.info("Beta API token lookup attempted for document download") objs = APIToken.query(beta=token) if not objs: + logging.warning("Beta API token lookup failed for document download: invalid API key") return get_error_data_result(message='Authentication error: API key is invalid!"') + if len(objs) > 1: + logging.error("Beta API token lookup is ambiguous for document download: matches=%s", len(objs)) + return get_error_data_result(message="Authentication error: API key configuration is ambiguous.") + tenant_id = objs[0].tenant_id + logging.info("Beta API token authorized for document download: tenant_id=%s", tenant_id) if not document_id: return get_error_data_result(message="Specify document_id please.") doc = DocumentService.query(id=document_id) if not doc: return get_error_data_result(message=f"The dataset not own the document {document_id}.") + if not KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id): + logging.warning( + "cross-tenant access denied for document download: tenant_id=%s kb_id=%s document_id=%s", + tenant_id, + doc[0].kb_id, + document_id, + ) + return get_error_data_result(message="You do not have access to this document.") # The process of downloading doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py index 4a6d022c6f..ca440d4ae0 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py @@ -516,7 +516,11 @@ class TestDocRoutesUnit: res = _run(module.download_doc("doc-1")) assert "API key is invalid" in res["message"] - monkeypatch.setattr(module.APIToken, "query", lambda **_kwargs: [SimpleNamespace()]) + monkeypatch.setattr(module.APIToken, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant-1"), SimpleNamespace(tenant_id="tenant-2")]) + res = _run(module.download_doc("doc-1")) + assert "API key configuration is ambiguous" in res["message"] + + monkeypatch.setattr(module.APIToken, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant-1")]) res = _run(module.download_doc("")) assert res["message"] == "Specify document_id please." @@ -525,6 +529,23 @@ class TestDocRoutesUnit: assert "not own the document" in res["message"] monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [_DummyDoc()]) + kb_query_calls = [] + + def _deny_kb_query(**kwargs): + kb_query_calls.append(kwargs) + return [] + + monkeypatch.setattr(module.KnowledgebaseService, "query", _deny_kb_query) + monkeypatch.setattr( + module.File2DocumentService, + "get_storage_address", + lambda **_kwargs: (_ for _ in ()).throw(AssertionError("storage lookup must not run before tenant authorization")), + ) + res = _run(module.download_doc("doc-1")) + assert res["message"] == "You do not have access to this document." + assert kb_query_calls == [{"id": "kb-1", "tenant_id": "tenant-1"}] + + monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: [1]) monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("b", "n")) _patch_storage(monkeypatch, module, file_stream=b"") res = _run(module.download_doc("doc-1"))