mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
Fix: stored XSS via HTML File upload and inline Rendering in file get (#13202)
### What problem does this PR solve? Fix stored XSS via HTML file upload and inline rendering in /v1/file/get/<id> ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -44,7 +44,7 @@ from api.utils.api_utils import (
|
||||
from api.utils.file_utils import filename_type, thumbnail
|
||||
from common.file_utils import get_project_base_directory
|
||||
from common.constants import RetCode, VALID_TASK_STATUS, ParserType, TaskStatus
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, html2pdf, is_valid_url
|
||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
from common import settings
|
||||
@@ -718,13 +718,11 @@ async def get(doc_id):
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
content_type = None
|
||||
if ext:
|
||||
if doc.type == FileType.VISUAL.value:
|
||||
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||
else:
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
response.headers.set("Content-Type", content_type)
|
||||
fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application"
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -737,7 +735,8 @@ async def download_attachment(attachment_id):
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ from api.db.services import duplicate_name
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils.api_utils import get_json_result, get_request_json
|
||||
from api.utils.file_utils import filename_type
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
|
||||
from common import settings
|
||||
|
||||
@manager.route('/upload', methods=['POST']) # noqa: F821
|
||||
@@ -364,12 +364,11 @@ async def get(file_id):
|
||||
response = await make_response(blob)
|
||||
ext = re.search(r"\.([^.]+)$", file.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
content_type = None
|
||||
if ext:
|
||||
if file.type == FileType.VISUAL.value:
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||
else:
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
response.headers.set("Content-Type", content_type)
|
||||
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -28,7 +28,7 @@ from api.db import FileType
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils.file_utils import filename_type
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
|
||||
from common import settings
|
||||
from common.constants import RetCode
|
||||
|
||||
@@ -623,11 +623,12 @@ async def get(tenant_id, file_id):
|
||||
|
||||
response = await make_response(blob)
|
||||
ext = re.search(r"\.([^.]+)$", file.name)
|
||||
if ext:
|
||||
if file.type == FileType.VISUAL.value:
|
||||
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
|
||||
else:
|
||||
response.headers.set('Content-Type', 'application/%s' % ext.group(1))
|
||||
extension = ext.group(1).lower() if ext else None
|
||||
content_type = None
|
||||
if extension:
|
||||
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
|
||||
content_type = CONTENT_TYPE_MAP.get(extension, f"{fallback_prefix}/{extension}")
|
||||
apply_safe_file_response_headers(response, content_type, extension)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@@ -640,7 +641,8 @@ async def download_attachment(tenant_id, attachment_id):
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -92,6 +92,46 @@ CONTENT_TYPE_MAP = {
|
||||
}
|
||||
|
||||
|
||||
FORCE_ATTACHMENT_EXTENSIONS = {
|
||||
"htm",
|
||||
"html",
|
||||
"shtml",
|
||||
"xht",
|
||||
"xhtml",
|
||||
"xml",
|
||||
"mhtml",
|
||||
"svg",
|
||||
}
|
||||
|
||||
|
||||
FORCE_ATTACHMENT_CONTENT_TYPES = {
|
||||
"text/html",
|
||||
"image/svg+xml",
|
||||
"application/xhtml+xml",
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
"multipart/related",
|
||||
}
|
||||
|
||||
|
||||
def should_force_attachment(ext: str | None, content_type: str | None = None) -> bool:
|
||||
normalized_ext = (ext or "").lower().strip(".")
|
||||
if normalized_ext in FORCE_ATTACHMENT_EXTENSIONS:
|
||||
return True
|
||||
normalized_type = (content_type or "").lower()
|
||||
return normalized_type in FORCE_ATTACHMENT_CONTENT_TYPES
|
||||
|
||||
|
||||
def apply_safe_file_response_headers(response, content_type: str | None, ext: str | None = None):
|
||||
if content_type:
|
||||
response.headers.set("Content-Type", content_type)
|
||||
force_attachment = should_force_attachment(ext, content_type)
|
||||
if force_attachment:
|
||||
response.headers.set("X-Content-Type-Options", "nosniff")
|
||||
response.headers.set("Content-Disposition", "attachment")
|
||||
return response
|
||||
|
||||
|
||||
def html2pdf(
|
||||
source: str,
|
||||
timeout: int = 2,
|
||||
@@ -188,10 +228,9 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
|
||||
return parsed if parsed > 0 else default
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
|
||||
async def send_email_html(to_email: str, subject: str, template_key: str, **context):
|
||||
|
||||
body = await render_template_string(EMAIL_TEMPLATES.get(template_key), **context)
|
||||
msg = MIMEText(body, "plain", "utf-8")
|
||||
msg["Subject"] = Header(subject, "utf-8")
|
||||
@@ -236,10 +275,10 @@ def otp_keys(email: str):
|
||||
|
||||
def hash_code(code: str, salt: bytes) -> str:
|
||||
import hashlib
|
||||
import hmac
|
||||
import hmac
|
||||
|
||||
return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
|
||||
|
||||
def captcha_key(email: str) -> str:
|
||||
return f"captcha:{email}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user