Fix: enforce tenant authorization on document download endpoint (#14618) (#14625)

### What problem does this PR solve?

Closes #14618.

The `GET /v1/document/get/<doc_id>` endpoint in
`api/apps/document_app.py` was protected only by `@login_required` and
called `DocumentService.get_by_id(doc_id)` without verifying that the
document's knowledge base belonged to the requesting user's tenant. Any
authenticated user who knew (or guessed) a document ID could download
files belonging to any other tenant — a cross-tenant IDOR.

This PR adds a `DocumentService.accessible(doc_id, current_user.id)`
check before serving the file. The helper already exists and joins
`Document` → `Knowledgebase` → `UserTenant` to verify the requesting
user belongs to the tenant that owns the document's KB. The same pattern
is already used by `api/apps/restful_apis/document_api.py` and mirrors
the tenant scoping in the SDK route at `api/apps/sdk/doc.py`.

The check returns the existing `"Document not found!"` error for both
non-existent and inaccessible documents, so attackers cannot use the
response to enumerate valid doc IDs across tenants.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Other (please describe): Security fix (cross-tenant IDOR /
authorization bypass)
This commit is contained in:
web-dev0521
2026-05-08 02:24:03 -04:00
committed by GitHub
parent a82ae4a991
commit d51fb88573
2 changed files with 60 additions and 1 deletions

View File

@@ -343,6 +343,31 @@ class TestDocumentMetadataUnit:
def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch):
module = document_app_module
# Cross-tenant access is denied -> "Document not found!" (no ID enumeration).
# Stub get_by_id to a valid document so the test can only pass via the
# accessible() early return; if that check ever regresses, the route would
# proceed and the assertions below would no longer match.
accessible_calls = []
def fake_accessible_denied(doc_id, user_id):
accessible_calls.append((doc_id, user_id))
return False
monkeypatch.setattr(module.DocumentService, "accessible", fake_accessible_denied)
monkeypatch.setattr(
module.DocumentService,
"get_by_id",
lambda _doc_id: (True, SimpleNamespace(name="real.bin", type=module.FileType.OTHER.value)),
)
res = _run(module.get("doc1"))
assert res["code"] == RetCode.DATA_ERROR
assert "Document not found!" in res["message"]
assert accessible_calls == [("doc1", "user-1")]
# From here on the user is authorized; exercise the original branches.
monkeypatch.setattr(module.DocumentService, "accessible", lambda _doc_id, _user_id: True)
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None))
res = _run(module.get("doc1"))
assert res["code"] == RetCode.DATA_ERROR
@@ -380,6 +405,22 @@ class TestDocumentMetadataUnit:
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(args={"ext": "abc"}))
# Cross-tenant access is denied -> "Document not found!" (no ID enumeration).
accessible_calls = []
def fake_accessible_denied(doc_id, user_id):
accessible_calls.append((doc_id, user_id))
return False
monkeypatch.setattr(module.DocumentService, "accessible", fake_accessible_denied)
res = _run(module.download_attachment(attachment_id="att1"))
assert res["code"] == RetCode.DATA_ERROR
assert "Document not found!" in res["message"]
assert accessible_calls == [("att1", "user-1")]
# From here on the user is authorized; exercise the original branches.
monkeypatch.setattr(module.DocumentService, "accessible", lambda _doc_id, _user_id: True)
async def fake_thread_pool_exec(*_args, **_kwargs):
return b"attachment"