mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-06 03:18:36 +08:00
fix(agent): enable MCP file preview via doc_id (#15399)
## Summary
MCP-wrapped agents could only force-download files looked up by
`doc_id`. This adds an explicit preview path and inline response headers
for previewable file types.
- **New** `GET /api/v1/agents/attachments/{attachment_id}/preview` —
inline preview for PDFs, images, and other safe types (pass `ext` and/or
`mime_type`)
- **Improved** `GET /api/v1/documents/{doc_id}/preview` — sets inline
disposition using the document filename
- **Improved** attachment download routing — resolves `mime_type` /
`ext` query params (no default `markdown`), supports
`disposition=inline`
- **DocGenerator output** — includes URL-encoded `preview_url` for MCP
clients
- **Legacy `/document/download/...` aliases** — still use download
semantics; MCP clients should call `/preview` explicitly
Fixes #15398
## Test plan
- [x] `pytest test/unit_test/api/utils/test_file_response_headers.py`
(6/6)
---------
Co-authored-by: MkDev11 <mkdev11@users.noreply.github.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: Ling Qin <qinling0210@163.com>
This commit is contained in:
147
api/utils/file_response.py
Normal file
147
api/utils/file_response.py
Normal file
@@ -0,0 +1,147 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
|
||||
CONTENT_TYPE_MAP = {
|
||||
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"doc": "application/msword",
|
||||
"pdf": "application/pdf",
|
||||
"csv": "text/csv",
|
||||
"xls": "application/vnd.ms-excel",
|
||||
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"txt": "text/plain",
|
||||
"py": "text/plain",
|
||||
"js": "text/plain",
|
||||
"java": "text/plain",
|
||||
"c": "text/plain",
|
||||
"cpp": "text/plain",
|
||||
"h": "text/plain",
|
||||
"php": "text/plain",
|
||||
"go": "text/plain",
|
||||
"ts": "text/plain",
|
||||
"sh": "text/plain",
|
||||
"cs": "text/plain",
|
||||
"kt": "text/plain",
|
||||
"sql": "text/plain",
|
||||
"md": "text/markdown",
|
||||
"markdown": "text/markdown",
|
||||
"mdx": "text/markdown",
|
||||
"htm": "text/html",
|
||||
"html": "text/html",
|
||||
"json": "application/json",
|
||||
"png": "image/png",
|
||||
"jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg",
|
||||
"gif": "image/gif",
|
||||
"bmp": "image/bmp",
|
||||
"tiff": "image/tiff",
|
||||
"tif": "image/tiff",
|
||||
"webp": "image/webp",
|
||||
"svg": "image/svg+xml",
|
||||
"ico": "image/x-icon",
|
||||
"avif": "image/avif",
|
||||
"heic": "image/heic",
|
||||
"ppt": "application/vnd.ms-powerpoint",
|
||||
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
}
|
||||
|
||||
FORCE_ATTACHMENT_EXTENSIONS = {
|
||||
"htm",
|
||||
"html",
|
||||
"shtml",
|
||||
"xht",
|
||||
"xhtml",
|
||||
"xml",
|
||||
"mhtml",
|
||||
"svg",
|
||||
}
|
||||
|
||||
FORCE_ATTACHMENT_CONTENT_TYPES = {
|
||||
"text/html",
|
||||
"image/svg+xml",
|
||||
"application/xhtml+xml",
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
"multipart/related",
|
||||
}
|
||||
|
||||
|
||||
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
|
||||
normalized_ext = (ext or "").lower().strip(".")
|
||||
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
|
||||
return True
|
||||
normalized_type = (content_type or "").lower().split(";")[0].strip()
|
||||
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
|
||||
|
||||
|
||||
def sanitize_content_disposition_filename(filename: str | None) -> str | None:
|
||||
if not filename:
|
||||
return None
|
||||
base = re.sub(r"[^\w.\-]", "_", str(filename).split("/")[-1].split("\\")[-1])
|
||||
return base or None
|
||||
|
||||
|
||||
def resolve_attachment_content_type(ext: str | None = None, mime_type: str | None = None) -> tuple[str | None, str | None]:
|
||||
if mime_type:
|
||||
normalized_type = mime_type.lower().split(";")[0].strip()
|
||||
for known_ext, known_type in CONTENT_TYPE_MAP.items():
|
||||
if known_type == normalized_type:
|
||||
return normalized_type, known_ext
|
||||
return normalized_type, (ext or "").lower().strip(".") or None
|
||||
if ext:
|
||||
normalized_ext = ext.lower().strip(".")
|
||||
return CONTENT_TYPE_MAP.get(normalized_ext, f"application/{normalized_ext}"), normalized_ext
|
||||
return None, None
|
||||
|
||||
|
||||
def apply_preview_file_response_headers(
|
||||
response,
|
||||
content_type: str | None,
|
||||
ext: str | None = None,
|
||||
filename: str | None = None,
|
||||
):
|
||||
if content_type:
|
||||
response.headers.set("Content-Type", content_type)
|
||||
if should_force_attachment(ext, content_type):
|
||||
response.headers.set("X-Content-Type-Options", "nosniff")
|
||||
response.headers.set("Content-Disposition", "attachment")
|
||||
return response
|
||||
safe_filename = sanitize_content_disposition_filename(filename)
|
||||
if safe_filename:
|
||||
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
|
||||
else:
|
||||
response.headers.set("Content-Disposition", "inline")
|
||||
return response
|
||||
|
||||
|
||||
def apply_download_file_response_headers(
|
||||
response,
|
||||
content_type: str | None,
|
||||
ext: str | None = None,
|
||||
filename: str | None = None,
|
||||
):
|
||||
if content_type:
|
||||
response.headers.set("Content-Type", content_type)
|
||||
if should_force_attachment(ext, content_type):
|
||||
response.headers.set("X-Content-Type-Options", "nosniff")
|
||||
response.headers.set("Content-Disposition", "attachment")
|
||||
return response
|
||||
safe_filename = sanitize_content_disposition_filename(filename)
|
||||
if safe_filename:
|
||||
response.headers.set("Content-Disposition", f'attachment; filename="{safe_filename}"')
|
||||
else:
|
||||
response.headers.set("Content-Disposition", "attachment")
|
||||
return response
|
||||
|
||||
|
||||
def agent_attachment_preview_path(attachment_id: str, *, ext: str | None = None, mime_type: str | None = None) -> str:
|
||||
query: dict[str, str] = {}
|
||||
if ext:
|
||||
query["ext"] = ext
|
||||
if mime_type:
|
||||
query["mime_type"] = mime_type
|
||||
suffix = f"?{urlencode(query)}" if query else ""
|
||||
return f"/api/v1/agents/attachments/{attachment_id}/preview{suffix}"
|
||||
@@ -40,83 +40,17 @@ ATTEMPT_LOCK_SECONDS = 30 * 60 # lock for 30 minutes
|
||||
RESEND_COOLDOWN_SECONDS = 60 # cooldown for 1 minute
|
||||
|
||||
|
||||
CONTENT_TYPE_MAP = {
|
||||
# Office
|
||||
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"doc": "application/msword",
|
||||
"pdf": "application/pdf",
|
||||
"csv": "text/csv",
|
||||
"xls": "application/vnd.ms-excel",
|
||||
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
# Text/code
|
||||
"txt": "text/plain",
|
||||
"py": "text/plain",
|
||||
"js": "text/plain",
|
||||
"java": "text/plain",
|
||||
"c": "text/plain",
|
||||
"cpp": "text/plain",
|
||||
"h": "text/plain",
|
||||
"php": "text/plain",
|
||||
"go": "text/plain",
|
||||
"ts": "text/plain",
|
||||
"sh": "text/plain",
|
||||
"cs": "text/plain",
|
||||
"kt": "text/plain",
|
||||
"sql": "text/plain",
|
||||
# Web
|
||||
"md": "text/markdown",
|
||||
"markdown": "text/markdown",
|
||||
"mdx": "text/markdown",
|
||||
"htm": "text/html",
|
||||
"html": "text/html",
|
||||
"json": "application/json",
|
||||
# Image formats
|
||||
"png": "image/png",
|
||||
"jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg",
|
||||
"gif": "image/gif",
|
||||
"bmp": "image/bmp",
|
||||
"tiff": "image/tiff",
|
||||
"tif": "image/tiff",
|
||||
"webp": "image/webp",
|
||||
"svg": "image/svg+xml",
|
||||
"ico": "image/x-icon",
|
||||
"avif": "image/avif",
|
||||
"heic": "image/heic",
|
||||
# PPTX
|
||||
"ppt": "application/vnd.ms-powerpoint",
|
||||
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
}
|
||||
|
||||
|
||||
FORCE_ATTACHMENT_EXTENSIONS = {
|
||||
"htm",
|
||||
"html",
|
||||
"shtml",
|
||||
"xht",
|
||||
"xhtml",
|
||||
"xml",
|
||||
"mhtml",
|
||||
"svg",
|
||||
}
|
||||
|
||||
|
||||
FORCE_ATTACHMENT_CONTENT_TYPES = {
|
||||
"text/html",
|
||||
"image/svg+xml",
|
||||
"application/xhtml+xml",
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
"multipart/related",
|
||||
}
|
||||
|
||||
|
||||
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
|
||||
normalized_ext = (ext or "").lower().strip(".")
|
||||
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
|
||||
return True
|
||||
normalized_type = (content_type or "").lower()
|
||||
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
|
||||
from api.utils.file_response import ( # noqa: F401
|
||||
CONTENT_TYPE_MAP,
|
||||
FORCE_ATTACHMENT_CONTENT_TYPES,
|
||||
FORCE_ATTACHMENT_EXTENSIONS,
|
||||
agent_attachment_preview_path,
|
||||
apply_download_file_response_headers,
|
||||
apply_preview_file_response_headers,
|
||||
resolve_attachment_content_type,
|
||||
sanitize_content_disposition_filename,
|
||||
should_force_attachment,
|
||||
)
|
||||
|
||||
|
||||
def apply_safe_file_response_headers(response, content_type: str | None, ext: str | None = None):
|
||||
|
||||
Reference in New Issue
Block a user