Fix: stored XSS via HTML File upload and inline Rendering in file get (#13202)

### What problem does this PR solve?

Fix stored XSS via HTML file upload and inline rendering in
/v1/file/get/<id>

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Yongteng Lei
2026-02-25 09:46:48 +08:00
committed by GitHub
parent 5a8fa7cf31
commit c292d617ca
4 changed files with 65 additions and 26 deletions

View File

@@ -44,7 +44,7 @@ from api.utils.api_utils import (
from api.utils.file_utils import filename_type, thumbnail
from common.file_utils import get_project_base_directory
from common.constants import RetCode, VALID_TASK_STATUS, ParserType, TaskStatus
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, html2pdf, is_valid_url
from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search, rag_tokenizer
from common import settings
@@ -718,13 +718,11 @@ async def get(doc_id):
ext = re.search(r"\.([^.]+)$", doc.name.lower())
ext = ext.group(1) if ext else None
content_type = None
if ext:
if doc.type == FileType.VISUAL.value:
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
except Exception as e:
return server_error_response(e)
@@ -737,7 +735,8 @@ async def download_attachment(attachment_id):
ext = request.args.get("ext", "markdown")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
response = await make_response(data)
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response

View File

@@ -31,7 +31,7 @@ from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.api_utils import get_json_result, get_request_json
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from common import settings
@manager.route('/upload', methods=['POST']) # noqa: F821
@@ -364,12 +364,11 @@ async def get(file_id):
response = await make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
content_type = None
if ext:
if file.type == FileType.VISUAL.value:
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
except Exception as e:
return server_error_response(e)

View File

@@ -28,7 +28,7 @@ from api.db import FileType
from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from common import settings
from common.constants import RetCode
@@ -623,11 +623,12 @@ async def get(tenant_id, file_id):
response = await make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)
if ext:
if file.type == FileType.VISUAL.value:
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
else:
response.headers.set('Content-Type', 'application/%s' % ext.group(1))
extension = ext.group(1).lower() if ext else None
content_type = None
if extension:
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(extension, f"{fallback_prefix}/{extension}")
apply_safe_file_response_headers(response, content_type, extension)
return response
except Exception as e:
return server_error_response(e)
@@ -640,7 +641,8 @@ async def download_attachment(tenant_id, attachment_id):
ext = request.args.get("ext", "markdown")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
response = await make_response(data)
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response

View File

@@ -92,6 +92,46 @@ CONTENT_TYPE_MAP = {
}
FORCE_ATTACHMENT_EXTENSIONS = {
"htm",
"html",
"shtml",
"xht",
"xhtml",
"xml",
"mhtml",
"svg",
}
FORCE_ATTACHMENT_CONTENT_TYPES = {
"text/html",
"image/svg+xml",
"application/xhtml+xml",
"text/xml",
"application/xml",
"multipart/related",
}
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
normalized_ext = (ext or "").lower().strip(".")
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
return True
normalized_type = (content_type or "").lower()
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
def apply_safe_file_response_headers(response, content_type: str | None, ext: str | None = None):
if content_type:
response.headers.set("Content-Type", content_type)
force_attachment = should_force_attachment(ext, content_type)
if force_attachment:
response.headers.set("X-Content-Type-Options", "nosniff")
response.headers.set("Content-Disposition", "attachment")
return response
def html2pdf(
source: str,
timeout: int = 2,
@@ -188,10 +228,9 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
return parsed if parsed > 0 else default
except (TypeError, ValueError):
return default
async def send_email_html(to_email: str, subject: str, template_key: str, **context):
body = await render_template_string(EMAIL_TEMPLATES.get(template_key), **context)
msg = MIMEText(body, "plain", "utf-8")
msg["Subject"] = Header(subject, "utf-8")
@@ -236,10 +275,10 @@ def otp_keys(email: str):
def hash_code(code: str, salt: bytes) -> str:
import hashlib
import hmac
import hmac
return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest()
def captcha_key(email: str) -> str:
return f"captcha:{email}"