diff --git a/api/apps/document_app.py b/api/apps/document_app.py index cc2b7c8c4a..b45c3c0a7e 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -44,7 +44,7 @@ from api.utils.api_utils import ( from api.utils.file_utils import filename_type, thumbnail from common.file_utils import get_project_base_directory from common.constants import RetCode, VALID_TASK_STATUS, ParserType, TaskStatus -from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url +from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, html2pdf, is_valid_url from deepdoc.parser.html_parser import RAGFlowHtmlParser from rag.nlp import search, rag_tokenizer from common import settings @@ -718,13 +718,11 @@ async def get(doc_id): ext = re.search(r"\.([^.]+)$", doc.name.lower()) ext = ext.group(1) if ext else None + content_type = None if ext: - if doc.type == FileType.VISUAL.value: - - content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}") - else: - content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") - response.headers.set("Content-Type", content_type) + fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application" + content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") + apply_safe_file_response_headers(response, content_type, ext) return response except Exception as e: return server_error_response(e) @@ -737,7 +735,8 @@ async def download_attachment(attachment_id): ext = request.args.get("ext", "markdown") data = await thread_pool_exec(settings.STORAGE_IMPL.get, current_user.id, attachment_id) response = await make_response(data) - response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}")) + content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") + apply_safe_file_response_headers(response, content_type, ext) return response diff --git a/api/apps/file_app.py b/api/apps/file_app.py index 50cbd185af..1733d9f808 100644 --- a/api/apps/file_app.py +++ b/api/apps/file_app.py @@ -31,7 +31,7 @@ from api.db.services import duplicate_name from api.db.services.file_service import FileService from api.utils.api_utils import get_json_result, get_request_json from api.utils.file_utils import filename_type -from api.utils.web_utils import CONTENT_TYPE_MAP +from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers from common import settings @manager.route('/upload', methods=['POST']) # noqa: F821 @@ -364,12 +364,11 @@ async def get(file_id): response = await make_response(blob) ext = re.search(r"\.([^.]+)$", file.name.lower()) ext = ext.group(1) if ext else None + content_type = None if ext: - if file.type == FileType.VISUAL.value: - content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}") - else: - content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") - response.headers.set("Content-Type", content_type) + fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" + content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") + apply_safe_file_response_headers(response, content_type, ext) return response except Exception as e: return server_error_response(e) diff --git a/api/apps/sdk/files.py b/api/apps/sdk/files.py index 759dfae80d..14ed3bf706 100644 --- a/api/apps/sdk/files.py +++ b/api/apps/sdk/files.py @@ -28,7 +28,7 @@ from api.db import FileType from api.db.services import duplicate_name from api.db.services.file_service import FileService from api.utils.file_utils import filename_type -from api.utils.web_utils import CONTENT_TYPE_MAP +from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers from common import settings from common.constants import RetCode @@ -623,11 +623,12 @@ async def get(tenant_id, file_id): response = await make_response(blob) ext = re.search(r"\.([^.]+)$", file.name) - if ext: - if file.type == FileType.VISUAL.value: - response.headers.set('Content-Type', 'image/%s' % ext.group(1)) - else: - response.headers.set('Content-Type', 'application/%s' % ext.group(1)) + extension = ext.group(1).lower() if ext else None + content_type = None + if extension: + fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" + content_type = CONTENT_TYPE_MAP.get(extension, f"{fallback_prefix}/{extension}") + apply_safe_file_response_headers(response, content_type, extension) return response except Exception as e: return server_error_response(e) @@ -640,7 +641,8 @@ async def download_attachment(tenant_id, attachment_id): ext = request.args.get("ext", "markdown") data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id) response = await make_response(data) - response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}")) + content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") + apply_safe_file_response_headers(response, content_type, ext) return response diff --git a/api/utils/web_utils.py b/api/utils/web_utils.py index 2d26229311..4cb13ff7e6 100644 --- a/api/utils/web_utils.py +++ b/api/utils/web_utils.py @@ -92,6 +92,46 @@ CONTENT_TYPE_MAP = { } +FORCE_ATTACHMENT_EXTENSIONS = { + "htm", + "html", + "shtml", + "xht", + "xhtml", + "xml", + "mhtml", + "svg", +} + + +FORCE_ATTACHMENT_CONTENT_TYPES = { + "text/html", + "image/svg+xml", + "application/xhtml+xml", + "text/xml", + "application/xml", + "multipart/related", +} + + +def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool: + normalized_ext = (ext or "").lower().strip(".") + if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS: + return True + normalized_type = (content_type or "").lower() + return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES + + +def apply_safe_file_response_headers(response, content_type: str | None, ext: str | None = None): + if content_type: + response.headers.set("Content-Type", content_type) + force_attachment = should_force_attachment(ext, content_type) + if force_attachment: + response.headers.set("X-Content-Type-Options", "nosniff") + response.headers.set("Content-Disposition", "attachment") + return response + + def html2pdf( source: str, timeout: int = 2, @@ -188,10 +228,9 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float: return parsed if parsed > 0 else default except (TypeError, ValueError): return default - + async def send_email_html(to_email: str, subject: str, template_key: str, **context): - body = await render_template_string(EMAIL_TEMPLATES.get(template_key), **context) msg = MIMEText(body, "plain", "utf-8") msg["Subject"] = Header(subject, "utf-8") @@ -236,10 +275,10 @@ def otp_keys(email: str): def hash_code(code: str, salt: bytes) -> str: import hashlib - import hmac + import hmac + return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest() - + def captcha_key(email: str) -> str: return f"captcha:{email}" -