fix(api): return 4xx not 500 when attachment blob is missing (#15509)

Guard the agent-attachment download against a missing or empty storage blob so the caller gets a structured 4xx (`Document not found!`) instead of an HTTP 500. Same bug class as #15365 on document preview.
Resolve #15502
This commit is contained in:
dripsmvcp
2026-06-15 00:41:49 -07:00
committed by GitHub
parent 0480dee83f
commit 53d4d9b3bd
2 changed files with 219 additions and 1 deletions

View File

@@ -2357,7 +2357,7 @@ async def download_attachment(tenant_id=None, attachment_id=None):
Mirrors the authorization model of the preview endpoint: the user must belong
to the tenant that owns the document's knowledge base. A denial returns the
same "Document not found!" response so the endpoint cannot be used to
same "Attachment not found!" response so the endpoint cannot be used to
enumerate doc ids across tenants.
"""
try:
@@ -2365,6 +2365,15 @@ async def download_attachment(tenant_id=None, attachment_id=None):
# pass `attachment_id` instead of the route parameter name.
ext = request.args.get("ext", "markdown")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
if not data:
# Storage object missing or empty (orphaned DB metadata, tenant
# mismatch). Without this guard `make_response(None)` raises
# `TypeError: response value cannot be None` and the caller
# sees HTTP 500 — same bug class as #15365 on document
# preview. Return the same "Attachment not found!" shape used
# by the preview route's missing-record path so byte-streaming
# endpoints respond consistently on a not-found.
return get_data_error_result(message="Attachment not found!")
response = await make_response(data)
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
apply_safe_file_response_headers(response, content_type, ext)

View File

@@ -0,0 +1,209 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Regression test for `/api/v1/agents/attachments/<attachment_id>/download` (#15502).
Without the empty-blob guard, `make_response(None)` raises `TypeError`
and the handler's blanket `except Exception` converts it to HTTP 500.
Same bug class as #15365 on document preview. This test pins the
missing-storage path to a structured 4xx response.
"""
import asyncio
import importlib.util
import sys
from pathlib import Path
from types import ModuleType, SimpleNamespace
import pytest
class _PassthroughManager:
def route(self, *_args, **_kwargs):
return lambda func: func
class _LenientModule(ModuleType):
"""A stub module that yields a harmless placeholder for any attribute that
wasn't explicitly provided. agent_api.py's top-level `from <mod> import a, b`
only needs every imported name to exist; symbols not on the
download_attachment path are never called, so a no-op placeholder is safe.
This keeps the test from rotting each time agent_api.py grows an import.
"""
def __getattr__(self, _name):
return lambda *_a, **_k: None
def _stub(monkeypatch, name, **attrs):
mod = _LenientModule(name)
for key, value in attrs.items():
setattr(mod, key, value)
monkeypatch.setitem(sys.modules, name, mod)
return mod
def _load_agent_api(monkeypatch, *, storage_get):
"""Load agent_api.py with the minimum stubs to exercise download_attachment."""
async def _make_response(payload):
if payload is None:
raise TypeError("response value cannot be None")
return SimpleNamespace(payload=payload, headers={})
_stub(
monkeypatch, "api.apps",
current_user=SimpleNamespace(id="tenant-1"),
login_required=lambda func: func,
)
_stub(monkeypatch, "api.apps.services.canvas_replica_service", CanvasReplicaService=SimpleNamespace())
_stub(monkeypatch, "api.db", CanvasCategory=SimpleNamespace())
_stub(monkeypatch, "api.db.db_models", Task=SimpleNamespace())
_stub(
monkeypatch, "api.db.services.api_service",
API4ConversationService=SimpleNamespace(
get_by_id=lambda _id: (False, None),
save=lambda **_k: True,
delete_by_id=lambda *_a, **_k: True,
query=lambda **_k: [],
),
)
_stub(
monkeypatch, "api.db.services.canvas_service",
CanvasTemplateService=SimpleNamespace(),
UserCanvasService=SimpleNamespace(accessible=lambda *_a, **_k: True, query=lambda **_k: []),
completion=lambda *_a, **_k: None,
completion_openai=lambda *_a, **_k: None,
)
_stub(monkeypatch, "api.db.services.document_service", DocumentService=SimpleNamespace())
_stub(monkeypatch, "api.db.services.file_service", FileService=SimpleNamespace())
_stub(monkeypatch, "api.db.services.knowledgebase_service", KnowledgebaseService=SimpleNamespace())
_stub(monkeypatch, "api.db.services.pipeline_operation_log_service", PipelineOperationLogService=SimpleNamespace())
_stub(
monkeypatch, "api.db.services.task_service",
CANVAS_DEBUG_DOC_ID="", TaskService=SimpleNamespace(), queue_dataflow=lambda *_a, **_k: None,
)
_stub(
monkeypatch, "api.db.services.user_service",
TenantService=SimpleNamespace(), UserService=SimpleNamespace(get_by_id=lambda *_a, **_k: (False, None)),
)
_stub(monkeypatch, "api.db.services.user_canvas_version", UserCanvasVersionService=SimpleNamespace())
_stub(
monkeypatch, "api.utils.api_utils",
construct_json_result=lambda **kw: {"kind": "json", **kw},
get_data_error_result=lambda message="", code=0, data=False: {"kind": "data_error", "message": message},
get_error_data_result=lambda *_a, **_k: {"kind": "error"},
get_result=lambda *_a, **_k: {"kind": "result"},
get_json_result=lambda *_a, **_k: {"kind": "json_result"},
server_error_response=lambda e: {"kind": "server_error", "error": str(e)},
add_tenant_id_to_kwargs=lambda func: func,
get_request_json=lambda: {},
# Used as `@validate_request(...)` decorator factory at module level, so it
# must return an identity decorator (the lenient fallback would return None
# and `@None` raises TypeError during import).
validate_request=lambda *_a, **_k: (lambda func: func),
)
_stub(
monkeypatch, "common.settings",
retriever=SimpleNamespace(), kg_retriever=SimpleNamespace(),
# download_attachment reads settings.STORAGE_IMPL.get after
# `from common import settings` rebinds the module's `settings` name.
STORAGE_IMPL=SimpleNamespace(get=storage_get),
)
_stub(monkeypatch, "common.ssrf_guard", assert_host_is_safe=lambda *_a, **_k: None)
_stub(monkeypatch, "common.constants", RetCode=SimpleNamespace())
_stub(monkeypatch, "api.utils.pagination_utils", validate_rest_api_page_size=lambda *_a, **_k: None)
_stub(monkeypatch, "peewee")
async def _thread_pool_exec(fn, *args, **kwargs):
# download_attachment does `await thread_pool_exec(STORAGE_IMPL.get, ...)`;
# run the callable inline so the storage stub's return value flows through.
return fn(*args, **kwargs)
_stub(monkeypatch, "common.misc_utils", get_uuid=lambda: "uuid", thread_pool_exec=_thread_pool_exec)
_stub(
monkeypatch, "api.utils.web_utils",
CONTENT_TYPE_MAP={"markdown": "text/markdown"},
apply_safe_file_response_headers=lambda *_a, **_k: None,
)
# Lenient: agent_api.py imports Response, jsonify, request, make_response from
# quart; only request and make_response are on the download path, the rest
# resolve to placeholders so the import line can't break the test.
quart_stub = _LenientModule("quart")
quart_stub.request = SimpleNamespace(method="GET", args={"ext": "markdown"})
quart_stub.make_response = _make_response
monkeypatch.setitem(sys.modules, "quart", quart_stub)
# parents[5] = repo root from test/unit_test/api/apps/restful_apis/<file>
repo_root = Path(__file__).resolve().parents[5]
module_path = repo_root / "api" / "apps" / "restful_apis" / "agent_api.py"
spec = importlib.util.spec_from_file_location("test_agent_api_module", module_path)
module = importlib.util.module_from_spec(spec)
# `manager` must exist before exec so the @manager.route decorators run.
module.manager = _PassthroughManager()
monkeypatch.setitem(sys.modules, "test_agent_api_module", module)
spec.loader.exec_module(module)
# Pin every module global that download_attachment resolves at call time.
# The sys.modules stubs above only guarantee the import *succeeds*. In the
# full test environment `from common import settings` (and the api_utils /
# misc_utils imports) can bind the REAL modules instead of our stubs — e.g.
# if another test already imported common.settings, it is an attribute on the
# already-loaded `common` package and the sys.modules stub is bypassed — so
# settings.STORAGE_IMPL would be the uninitialised real None and the handler
# would 500 with "'NoneType' object has no attribute 'get'". Overriding the
# globals on the loaded module makes behaviour identical in both the
# bare-stub (local) and full-dependency (CI) environments.
module.settings = SimpleNamespace(STORAGE_IMPL=SimpleNamespace(get=storage_get))
module.request = SimpleNamespace(method="GET", args={"ext": "markdown"})
module.make_response = _make_response
module.thread_pool_exec = _thread_pool_exec
module.get_data_error_result = lambda message="", **_k: {"kind": "data_error", "message": message}
module.server_error_response = lambda e: {"kind": "server_error", "error": str(e)}
module.CONTENT_TYPE_MAP = {"markdown": "text/markdown"}
module.apply_safe_file_response_headers = lambda *_a, **_k: None
return module
@pytest.mark.p1
class TestAttachmentDownloadMissingBlob:
"""Regression for #15502: missing-blob → structured 4xx, not HTTP 500."""
def test_empty_blob_returns_not_found(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Storage returns None (orphaned metadata) → 'Document not found!' 4xx,
not a TypeError 500 from make_response(None)."""
module = _load_agent_api(monkeypatch, storage_get=lambda *_a, **_k: None)
result = asyncio.run(module.download_attachment(tenant_id="t1", attachment_id="orphan"))
# 'data_error' is the get_data_error_result shape from our stub above.
# If the empty-blob guard is missing, make_response(None) raises and
# the result instead has `kind == "server_error"`.
assert isinstance(result, dict) and result.get("kind") == "data_error", result
assert "not found" in result["message"].lower()
def test_empty_bytes_returns_not_found(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Same path when storage returns b'' instead of None."""
module = _load_agent_api(monkeypatch, storage_get=lambda *_a, **_k: b"")
result = asyncio.run(module.download_attachment(tenant_id="t1", attachment_id="empty"))
assert isinstance(result, dict) and result.get("kind") == "data_error"
assert "not found" in result["message"].lower()
def test_nonempty_blob_proceeds(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Happy path: real bytes flow through make_response untouched."""
module = _load_agent_api(monkeypatch, storage_get=lambda *_a, **_k: b"PDFDATA")
result = asyncio.run(module.download_attachment(tenant_id="t1", attachment_id="good"))
# SimpleNamespace from our _make_response stub
assert hasattr(result, "payload") and result.payload == b"PDFDATA"