From d51fb88573861e68f40d795be93101f25567b8dd Mon Sep 17 00:00:00 2001 From: web-dev0521 Date: Fri, 8 May 2026 02:24:03 -0400 Subject: [PATCH] Fix: enforce tenant authorization on document download endpoint (#14618) (#14625) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Closes #14618. The `GET /v1/document/get/` endpoint in `api/apps/document_app.py` was protected only by `@login_required` and called `DocumentService.get_by_id(doc_id)` without verifying that the document's knowledge base belonged to the requesting user's tenant. Any authenticated user who knew (or guessed) a document ID could download files belonging to any other tenant — a cross-tenant IDOR. This PR adds a `DocumentService.accessible(doc_id, current_user.id)` check before serving the file. The helper already exists and joins `Document` → `Knowledgebase` → `UserTenant` to verify the requesting user belongs to the tenant that owns the document's KB. The same pattern is already used by `api/apps/restful_apis/document_api.py` and mirrors the tenant scoping in the SDK route at `api/apps/sdk/doc.py`. The check returns the existing `"Document not found!"` error for both non-existent and inaccessible documents, so attackers cannot use the response to enumerate valid doc IDs across tenants. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Other (please describe): Security fix (cross-tenant IDOR / authorization bypass) --- api/apps/restful_apis/document_api.py | 20 ++++++++- .../test_document_metadata.py | 41 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index f57fe6b8ae..a4d68c2e00 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -23,7 +23,7 @@ from quart import request, make_response from peewee import OperationalError from pydantic import ValidationError -from api.apps import login_required +from api.apps import current_user, login_required from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX from api.apps.services.document_api_service import validate_document_update_fields, map_doc_keys, \ map_doc_keys_with_run_status, update_document_name_only, update_chunk_method, update_document_status_only, \ @@ -1859,7 +1859,16 @@ async def batch_update_document_status(tenant_id, dataset_id): @manager.route("/documents//preview", methods=["GET"]) # noqa: F821 @login_required async def get(doc_id): + """Return the raw file bytes for a document the requesting user is authorized to read. + + The user must belong to the tenant that owns the document's knowledge base; otherwise + the response is indistinguishable from a missing document to avoid cross-tenant ID + enumeration. + """ try: + if not DocumentService.accessible(doc_id, current_user.id): + return get_data_error_result(message="Document not found!") + e, doc = DocumentService.get_by_id(doc_id) if not e: return get_data_error_result(message="Document not found!") @@ -1884,10 +1893,19 @@ async def get(doc_id): @login_required @add_tenant_id_to_kwargs async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None): + """Stream a document's underlying file to the requesting user. + + Mirrors the authorization model of the preview endpoint: the user must belong + to the tenant that owns the document's knowledge base. A denial returns the + same "Document not found!" response so the endpoint cannot be used to + enumerate doc ids across tenants. + """ try: # Keep backward compatibility with older callers and unit tests that still # pass `attachment_id` instead of the route parameter name. doc_id = doc_id or attachment_id + if not DocumentService.accessible(doc_id, current_user.id): + return get_data_error_result(message="Document not found!") ext = request.args.get("ext", "markdown") data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id) response = await make_response(data) diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 5a843cdc3a..71bf32d565 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -343,6 +343,31 @@ class TestDocumentMetadataUnit: def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module + + # Cross-tenant access is denied -> "Document not found!" (no ID enumeration). + # Stub get_by_id to a valid document so the test can only pass via the + # accessible() early return; if that check ever regresses, the route would + # proceed and the assertions below would no longer match. + accessible_calls = [] + + def fake_accessible_denied(doc_id, user_id): + accessible_calls.append((doc_id, user_id)) + return False + + monkeypatch.setattr(module.DocumentService, "accessible", fake_accessible_denied) + monkeypatch.setattr( + module.DocumentService, + "get_by_id", + lambda _doc_id: (True, SimpleNamespace(name="real.bin", type=module.FileType.OTHER.value)), + ) + res = _run(module.get("doc1")) + assert res["code"] == RetCode.DATA_ERROR + assert "Document not found!" in res["message"] + assert accessible_calls == [("doc1", "user-1")] + + # From here on the user is authorized; exercise the original branches. + monkeypatch.setattr(module.DocumentService, "accessible", lambda _doc_id, _user_id: True) + monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None)) res = _run(module.get("doc1")) assert res["code"] == RetCode.DATA_ERROR @@ -380,6 +405,22 @@ class TestDocumentMetadataUnit: module = document_app_module monkeypatch.setattr(module, "request", _DummyRequest(args={"ext": "abc"})) + # Cross-tenant access is denied -> "Document not found!" (no ID enumeration). + accessible_calls = [] + + def fake_accessible_denied(doc_id, user_id): + accessible_calls.append((doc_id, user_id)) + return False + + monkeypatch.setattr(module.DocumentService, "accessible", fake_accessible_denied) + res = _run(module.download_attachment(attachment_id="att1")) + assert res["code"] == RetCode.DATA_ERROR + assert "Document not found!" in res["message"] + assert accessible_calls == [("att1", "user-1")] + + # From here on the user is authorized; exercise the original branches. + monkeypatch.setattr(module.DocumentService, "accessible", lambda _doc_id, _user_id: True) + async def fake_thread_pool_exec(*_args, **_kwargs): return b"attachment"