fix(agent): enable MCP file preview via doc_id (#15399)

## Summary
MCP-wrapped agents could only force-download files looked up by
`doc_id`. This adds an explicit preview path and inline response headers
for previewable file types.

- **New** `GET /api/v1/agents/attachments/{attachment_id}/preview` —
inline preview for PDFs, images, and other safe types (pass `ext` and/or
`mime_type`)
- **Improved** `GET /api/v1/documents/{doc_id}/preview` — sets inline
disposition using the document filename
- **Improved** attachment download routing — resolves `mime_type` /
`ext` query params (no default `markdown`), supports
`disposition=inline`
- **DocGenerator output** — includes URL-encoded `preview_url` for MCP
clients
- **Legacy `/document/download/...` aliases** — still use download
semantics; MCP clients should call `/preview` explicitly

Fixes #15398

## Test plan
- [x] `pytest test/unit_test/api/utils/test_file_response_headers.py`
(6/6)

---------

Co-authored-by: MkDev11 <mkdev11@users.noreply.github.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: Ling Qin <qinling0210@163.com>
This commit is contained in:
monsterDavid
2026-07-03 04:56:01 -07:00
committed by GitHub
parent 0f4f2135f3
commit 7da4f200e5
8 changed files with 295 additions and 106 deletions

View File

@@ -13,6 +13,7 @@ from xml.sax.saxutils import escape
from agent.component.base import ComponentParamBase
from api.utils.api_utils import timeout
from api.utils.file_response import agent_attachment_preview_path
from common import settings
from common.misc_utils import get_uuid
from .message import Message
@@ -136,6 +137,7 @@ class DocGenerator(Message, ABC):
"mime_type": mime_type,
"size": file_size,
"base64": file_base64,
"preview_url": agent_attachment_preview_path(doc_id, ext=output_format, mime_type=mime_type),
"include_download_info_in_content": self._param.include_download_info_in_content,
}
self.set_output("doc_id", doc_id)

View File

@@ -27,7 +27,11 @@ import time
from functools import partial, wraps
from typing import Set
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from api.utils.file_response import (
apply_download_file_response_headers,
apply_preview_file_response_headers,
resolve_attachment_content_type,
)
import jwt
from quart import Response, jsonify, request, make_response
@@ -2463,37 +2467,51 @@ async def webhook_trace(agent_id: str):
}
)
def _attachment_request_metadata():
ext = request.args.get("ext")
mime_type = request.args.get("mime_type")
filename = request.args.get("filename")
content_type, resolved_ext = resolve_attachment_content_type(ext, mime_type)
if not content_type:
content_type = "application/octet-stream"
if not resolved_ext:
resolved_ext = ext.lower().strip(".") if isinstance(ext, str) and ext else "bin"
return content_type, resolved_ext, filename
async def _stream_agent_attachment(tenant_id, attachment_id, *, inline: bool):
attachment_id = attachment_id or request.view_args.get("attachment_id")
content_type, ext, filename = _attachment_request_metadata()
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
if not data:
return get_data_error_result(message="Document not found!")
response = await make_response(data)
if inline:
apply_preview_file_response_headers(response, content_type, ext, filename)
else:
apply_download_file_response_headers(response, content_type, ext, filename)
return response
@manager.route("/agents/attachments/<attachment_id>/preview", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def preview_attachment(tenant_id=None, attachment_id=None):
"""Stream an agent-generated attachment for inline preview in MCP clients."""
try:
return await _stream_agent_attachment(tenant_id, attachment_id, inline=True)
except Exception as e:
return server_error_response(e)
@manager.route("/agents/attachments/<attachment_id>/download", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def download_attachment(tenant_id=None, attachment_id=None):
"""Stream a document's underlying file to the requesting user.
Mirrors the authorization model of the preview endpoint: the user must belong
to the tenant that owns the document's knowledge base. A denial returns the
same "Attachment not found!" response so the endpoint cannot be used to
enumerate doc ids across tenants.
"""
"""Stream an agent-generated attachment as a download."""
try:
# Keep backward compatibility with older callers and unit tests that still
# pass `attachment_id` instead of the route parameter name.
ext = request.args.get("ext", "markdown")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
if not data:
# Storage object missing or empty (orphaned DB metadata, tenant
# mismatch). Without this guard `make_response(None)` raises
# `TypeError: response value cannot be None` and the caller
# sees HTTP 500 — same bug class as #15365 on document
# preview. Return the same "Attachment not found!" shape used
# by the preview route's missing-record path so byte-streaming
# endpoints respond consistently on a not-found.
return get_data_error_result(message="Attachment not found!")
response = await make_response(data)
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
if request.args.get("disposition", "").lower() == "inline":
return await _stream_agent_attachment(tenant_id, attachment_id, inline=True)
return await _stream_agent_attachment(tenant_id, attachment_id, inline=False)
except Exception as e:
return server_error_response(e)

View File

@@ -72,6 +72,7 @@ from common.constants import ParserType, RetCode, TaskStatus, SANDBOX_ARTIFACT_B
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
from common.misc_utils import get_uuid, thread_pool_exec
from api.utils.file_utils import filename_type, thumbnail
from api.utils.file_response import apply_preview_file_response_headers
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url, apply_safe_file_response_headers
from common.ssrf_guard import assert_url_is_safe
from rag.nlp import search
@@ -2088,7 +2089,7 @@ async def get(doc_id):
if ext:
fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
apply_preview_file_response_headers(response, content_type, ext, doc.name)
return response
except Exception as e:
return server_error_response(e)

147
api/utils/file_response.py Normal file
View File

@@ -0,0 +1,147 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
import re
from urllib.parse import urlencode
CONTENT_TYPE_MAP = {
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"doc": "application/msword",
"pdf": "application/pdf",
"csv": "text/csv",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"txt": "text/plain",
"py": "text/plain",
"js": "text/plain",
"java": "text/plain",
"c": "text/plain",
"cpp": "text/plain",
"h": "text/plain",
"php": "text/plain",
"go": "text/plain",
"ts": "text/plain",
"sh": "text/plain",
"cs": "text/plain",
"kt": "text/plain",
"sql": "text/plain",
"md": "text/markdown",
"markdown": "text/markdown",
"mdx": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"bmp": "image/bmp",
"tiff": "image/tiff",
"tif": "image/tiff",
"webp": "image/webp",
"svg": "image/svg+xml",
"ico": "image/x-icon",
"avif": "image/avif",
"heic": "image/heic",
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
FORCE_ATTACHMENT_EXTENSIONS = {
"htm",
"html",
"shtml",
"xht",
"xhtml",
"xml",
"mhtml",
"svg",
}
FORCE_ATTACHMENT_CONTENT_TYPES = {
"text/html",
"image/svg+xml",
"application/xhtml+xml",
"text/xml",
"application/xml",
"multipart/related",
}
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
normalized_ext = (ext or "").lower().strip(".")
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
return True
normalized_type = (content_type or "").lower().split(";")[0].strip()
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
def sanitize_content_disposition_filename(filename: str | None) -> str | None:
if not filename:
return None
base = re.sub(r"[^\w.\-]", "_", str(filename).split("/")[-1].split("\\")[-1])
return base or None
def resolve_attachment_content_type(ext: str | None = None, mime_type: str | None = None) -> tuple[str | None, str | None]:
if mime_type:
normalized_type = mime_type.lower().split(";")[0].strip()
for known_ext, known_type in CONTENT_TYPE_MAP.items():
if known_type == normalized_type:
return normalized_type, known_ext
return normalized_type, (ext or "").lower().strip(".") or None
if ext:
normalized_ext = ext.lower().strip(".")
return CONTENT_TYPE_MAP.get(normalized_ext, f"application/{normalized_ext}"), normalized_ext
return None, None
def apply_preview_file_response_headers(
response,
content_type: str | None,
ext: str | None = None,
filename: str | None = None,
):
if content_type:
response.headers.set("Content-Type", content_type)
if should_force_attachment(ext, content_type):
response.headers.set("X-Content-Type-Options", "nosniff")
response.headers.set("Content-Disposition", "attachment")
return response
safe_filename = sanitize_content_disposition_filename(filename)
if safe_filename:
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
else:
response.headers.set("Content-Disposition", "inline")
return response
def apply_download_file_response_headers(
response,
content_type: str | None,
ext: str | None = None,
filename: str | None = None,
):
if content_type:
response.headers.set("Content-Type", content_type)
if should_force_attachment(ext, content_type):
response.headers.set("X-Content-Type-Options", "nosniff")
response.headers.set("Content-Disposition", "attachment")
return response
safe_filename = sanitize_content_disposition_filename(filename)
if safe_filename:
response.headers.set("Content-Disposition", f'attachment; filename="{safe_filename}"')
else:
response.headers.set("Content-Disposition", "attachment")
return response
def agent_attachment_preview_path(attachment_id: str, *, ext: str | None = None, mime_type: str | None = None) -> str:
query: dict[str, str] = {}
if ext:
query["ext"] = ext
if mime_type:
query["mime_type"] = mime_type
suffix = f"?{urlencode(query)}" if query else ""
return f"/api/v1/agents/attachments/{attachment_id}/preview{suffix}"

View File

@@ -40,83 +40,17 @@ ATTEMPT_LOCK_SECONDS = 30 * 60 # lock for 30 minutes
RESEND_COOLDOWN_SECONDS = 60 # cooldown for 1 minute
CONTENT_TYPE_MAP = {
# Office
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"doc": "application/msword",
"pdf": "application/pdf",
"csv": "text/csv",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
# Text/code
"txt": "text/plain",
"py": "text/plain",
"js": "text/plain",
"java": "text/plain",
"c": "text/plain",
"cpp": "text/plain",
"h": "text/plain",
"php": "text/plain",
"go": "text/plain",
"ts": "text/plain",
"sh": "text/plain",
"cs": "text/plain",
"kt": "text/plain",
"sql": "text/plain",
# Web
"md": "text/markdown",
"markdown": "text/markdown",
"mdx": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",
# Image formats
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"bmp": "image/bmp",
"tiff": "image/tiff",
"tif": "image/tiff",
"webp": "image/webp",
"svg": "image/svg+xml",
"ico": "image/x-icon",
"avif": "image/avif",
"heic": "image/heic",
# PPTX
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
FORCE_ATTACHMENT_EXTENSIONS = {
"htm",
"html",
"shtml",
"xht",
"xhtml",
"xml",
"mhtml",
"svg",
}
FORCE_ATTACHMENT_CONTENT_TYPES = {
"text/html",
"image/svg+xml",
"application/xhtml+xml",
"text/xml",
"application/xml",
"multipart/related",
}
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
normalized_ext = (ext or "").lower().strip(".")
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
return True
normalized_type = (content_type or "").lower()
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
from api.utils.file_response import ( # noqa: F401
CONTENT_TYPE_MAP,
FORCE_ATTACHMENT_CONTENT_TYPES,
FORCE_ATTACHMENT_EXTENSIONS,
agent_attachment_preview_path,
apply_download_file_response_headers,
apply_preview_file_response_headers,
resolve_attachment_content_type,
sanitize_content_disposition_filename,
should_force_attachment,
)
def apply_safe_file_response_headers(response, content_type: str | None, ext: str | None = None):

View File

@@ -47,6 +47,11 @@ class _LenientModule(ModuleType):
return lambda *_a, **_k: None
class _Headers(dict):
def set(self, key, value):
self[key] = value
def _stub(monkeypatch, name, **attrs):
mod = _LenientModule(name)
for key, value in attrs.items():
@@ -61,7 +66,7 @@ def _load_agent_api(monkeypatch, *, storage_get):
async def _make_response(payload):
if payload is None:
raise TypeError("response value cannot be None")
return SimpleNamespace(payload=payload, headers={})
return SimpleNamespace(payload=payload, headers=_Headers())
_stub(
monkeypatch,

View File

@@ -0,0 +1,64 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
from urllib.parse import urlencode
import pytest
from api.utils import file_response as module
class _DummyHeaders(dict):
def set(self, key, value):
self[key] = value
class _DummyResponse:
def __init__(self):
self.headers = _DummyHeaders()
@pytest.mark.p2
def test_apply_preview_sets_inline_for_pdf():
response = _DummyResponse()
module.apply_preview_file_response_headers(response, "application/pdf", "pdf", "report.pdf")
assert response.headers["Content-Type"] == "application/pdf"
assert response.headers["Content-Disposition"] == 'inline; filename="report.pdf"'
@pytest.mark.p2
def test_apply_preview_forces_attachment_for_html():
response = _DummyResponse()
module.apply_preview_file_response_headers(response, "text/html", "html", "page.html")
assert response.headers["Content-Disposition"] == "attachment"
assert response.headers["X-Content-Type-Options"] == "nosniff"
@pytest.mark.p2
def test_apply_download_sets_attachment_for_pdf():
response = _DummyResponse()
module.apply_download_file_response_headers(response, "application/pdf", "pdf", "report.pdf")
assert response.headers["Content-Disposition"] == 'attachment; filename="report.pdf"'
@pytest.mark.p2
def test_resolve_attachment_content_type_prefers_mime_type():
content_type, ext = module.resolve_attachment_content_type(ext="md", mime_type="application/pdf")
assert content_type == "application/pdf"
assert ext == "pdf"
@pytest.mark.p2
def test_agent_attachment_preview_path_includes_query():
path = module.agent_attachment_preview_path("doc-1", ext="pdf", mime_type="application/pdf")
query = urlencode({"ext": "pdf", "mime_type": "application/pdf"})
assert path == f"/api/v1/agents/attachments/doc-1/preview?{query}"
@pytest.mark.p2
def test_agent_attachment_preview_path_encodes_svg_mime_type():
path = module.agent_attachment_preview_path("doc-1", ext="svg", mime_type="image/svg+xml")
query = urlencode({"ext": "svg", "mime_type": "image/svg+xml"})
assert path == f"/api/v1/agents/attachments/doc-1/preview?{query}"
assert "%2B" in path or "svg%2Bxml" in path

View File

@@ -280,6 +280,24 @@ export default {
cancelDataflow: (id: string) => `${restAPIv1}/tasks/${id}/cancel`,
getAttachmentFileDownload: (docId: string) =>
`${restAPIv1}/agents/attachments/${docId}/download`,
getAttachmentFilePreview: ({
docId,
ext,
mimeType,
filename,
}: {
docId: string;
ext?: string;
mimeType?: string;
filename?: string;
}) => {
const params = new URLSearchParams();
if (ext) params.set('ext', ext);
if (mimeType) params.set('mime_type', mimeType);
if (filename) params.set('filename', filename);
const query = params.toString();
return `${restAPIv1}/agents/attachments/${docId}/preview${query ? `?${query}` : ''}`;
},
downloadFile: `${restAPIv1}/agents/download`,
testWebhook: (id: string) => `${restAPIv1}/agents/${id}/webhook/test`,
fetchWebhookTrace: (id: string) => `${restAPIv1}/agents/${id}/webhook/logs`,