From 6932615852eb08c5d01f4ceb41e0c3a47b0d1a89 Mon Sep 17 00:00:00 2001 From: kpdev <156195510+kiannidev@users.noreply.github.com> Date: Wed, 20 May 2026 22:57:01 -0700 Subject: [PATCH] fix(api): infer /documents/{id}/download Content-Type from filename when ext is omitted (#15052) (#15053) ## Summary - Align **GET `/api/v1/documents//download`** with **`/preview`**: resolve extension and MIME type from the stored document name when the **`ext` query parameter is omitted**, instead of defaulting to `markdown`. - When **`?ext=`** is present, behavior stays the same as before (explicit extension / `Content-Type` mapping). - Enforce the same access + document lookup pattern as preview (**`accessible`** + **`get_by_id`**). - Extend unit tests for the no-`ext` PDF filename case. ## Test plan - [x] `uv run pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_download_attachment_success_and_exception_unit` - [x] Optional: `curl -sSI` against `/api/v1/documents//download` without `ext` and confirm `Content-Type: application/pdf` Fixes #15052. --- api/apps/restful_apis/document_api.py | 21 +++++++++++++++++-- .../test_document_metadata.py | 17 +++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 57215080d3..e816da2810 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -1881,10 +1881,27 @@ async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None): # Keep backward compatibility with older callers and unit tests that still # pass `attachment_id` instead of the route parameter name. doc_id = doc_id or attachment_id - ext = request.args.get("ext", "markdown") + if not DocumentService.accessible(doc_id, current_user.id): + return get_data_error_result(message="Document not found!") + + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_data_error_result(message="Document not found!") + + ext_arg = request.args.get("ext") + if ext_arg: + ext = ext_arg.lower().lstrip(".") + content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") + else: + m = re.search(r"\.([^.]+)$", doc.name.lower()) + ext = m.group(1) if m else None + content_type = None + if ext: + fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application" + content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") + data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id) response = await make_response(data) - content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") apply_safe_file_response_headers(response, content_type, ext) return response diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 71bf32d565..0faa3283d1 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -420,6 +420,11 @@ class TestDocumentMetadataUnit: # From here on the user is authorized; exercise the original branches. monkeypatch.setattr(module.DocumentService, "accessible", lambda _doc_id, _user_id: True) + monkeypatch.setattr( + module.DocumentService, + "get_by_id", + lambda _doc_id: (True, SimpleNamespace(name="stub.bin", type=module.FileType.OTHER.value)), + ) async def fake_thread_pool_exec(*_args, **_kwargs): return b"attachment" @@ -441,6 +446,18 @@ class TestDocumentMetadataUnit: assert res.headers["content_type"] == "application/abc" assert res.headers["extension"] == "abc" + # No `ext` query param: infer MIME/extension from the stored document filename (aligned with /preview). + monkeypatch.setattr(module, "request", _DummyRequest(args={})) + monkeypatch.setattr( + module.DocumentService, + "get_by_id", + lambda _doc_id: (True, SimpleNamespace(name="Annual report.PDF", type=module.FileType.PDF.value)), + ) + res = _run(module.download_attachment(attachment_id="att1")) + assert isinstance(res, _DummyResponse) + assert res.headers["content_type"] == "application/pdf" + assert res.headers["extension"] == "pdf" + async def raise_error(*_args, **_kwargs): raise RuntimeError("download boom")