diff --git a/agent/tools/code_exec.py b/agent/tools/code_exec.py index 5d65a2e33a..229967a572 100644 --- a/agent/tools/code_exec.py +++ b/agent/tools/code_exec.py @@ -533,7 +533,7 @@ class CodeExec(ToolBase, ABC): settings.STORAGE_IMPL.put(SANDBOX_ARTIFACT_BUCKET, storage_name, binary) - url = f"/v1/document/artifact/{storage_name}" + url = f"/api/v1/documents/artifact/{storage_name}" uploaded.append( { "name": name, diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 642ff8b456..d009071505 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -38,7 +38,7 @@ from api.utils.api_utils import ( ) from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, is_valid_url from common import settings -from common.constants import SANDBOX_ARTIFACT_BUCKET, RetCode, TaskStatus +from common.constants import RetCode, TaskStatus from common.file_utils import get_project_base_directory from common.misc_utils import thread_pool_exec from common.ssrf_guard import assert_url_is_safe @@ -325,44 +325,6 @@ async def get_image(image_id): return server_error_response(e) -ARTIFACT_CONTENT_TYPES = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".svg": "image/svg+xml", - ".pdf": "application/pdf", - ".csv": "text/csv", - ".json": "application/json", - ".html": "text/html", -} - - -@manager.route("/artifact/", methods=["GET"]) # noqa: F821 -@login_required -async def get_artifact(filename): - try: - bucket = SANDBOX_ARTIFACT_BUCKET - # Validate filename: must be uuid hex + allowed extension, nothing else - basename = os.path.basename(filename) - if basename != filename or "/" in filename or "\\" in filename: - return get_data_error_result(message="Invalid filename.") - ext = os.path.splitext(basename)[1].lower() - if ext not in ARTIFACT_CONTENT_TYPES: - return get_data_error_result(message="Invalid file type.") - data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename) - if not data: - return get_data_error_result(message="Artifact not found.") - content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream") - response = await make_response(data) - safe_filename = re.sub(r"[^\w.\-]", "_", basename) - apply_safe_file_response_headers(response, content_type, ext) - if not response.headers.get("Content-Disposition"): - response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"') - return response - except Exception as e: - return server_error_response(e) - - @manager.route("/upload_and_parse", methods=["POST"]) # noqa: F821 @login_required @validate_request("conversation_id") diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 3055ca8707..560eea9334 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -15,10 +15,11 @@ # import logging import json +import os.path import re from pathlib import Path -from quart import request +from quart import make_response, request from peewee import OperationalError from pydantic import ValidationError @@ -41,12 +42,13 @@ from api.utils.validation_utils import ( UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq, ) from common import settings -from common.constants import ParserType, RetCode, TaskStatus +from common.constants import ParserType, RetCode, SANDBOX_ARTIFACT_BUCKET, TaskStatus from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema from common.misc_utils import get_uuid, thread_pool_exec from api.utils.file_utils import filename_type, thumbnail from api.utils.web_utils import html2pdf, is_valid_url from rag.nlp import search +from api.utils.web_utils import apply_safe_file_response_headers @manager.route("/datasets//documents/", methods=["PATCH"]) # noqa: F821 @login_required @@ -1441,3 +1443,65 @@ async def stop_parse_documents(tenant_id, dataset_id): except Exception as e: logging.exception(e) return get_error_data_result(message="Internal server error") + + +ARTIFACT_CONTENT_TYPES = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".svg": "image/svg+xml", + ".pdf": "application/pdf", + ".csv": "text/csv", + ".json": "application/json", + ".html": "text/html", +} + + +@manager.route("/documents/artifact/", methods=["GET"]) # noqa: F821 +@login_required +async def get_artifact(filename): + """ + Get an artifact file. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: filename + type: string + required: true + description: Name of the artifact file. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Artifact file returned successfully. + """ + from common import settings + + try: + bucket = SANDBOX_ARTIFACT_BUCKET + # Validate filename: must be uuid hex + allowed extension, nothing else + basename = os.path.basename(filename) + if basename != filename or "/" in filename or "\\" in filename: + return get_data_error_result(message="Invalid filename.") + ext = os.path.splitext(basename)[1].lower() + if ext not in ARTIFACT_CONTENT_TYPES: + return get_data_error_result(message="Invalid file type.") + data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename) + if not data: + return get_data_error_result(message="Artifact not found.") + content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream") + response = await make_response(data) + safe_filename = re.sub(r"[^\w.\-]", "_", basename) + apply_safe_file_response_headers(response, content_type, ext) + if not response.headers.get("Content-Disposition"): + response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"') + return response + except Exception as e: + return server_error_response(e) diff --git a/web/src/components/next-markdown-content/index.tsx b/web/src/components/next-markdown-content/index.tsx index c13cb6159f..8fc966897d 100644 --- a/web/src/components/next-markdown-content/index.tsx +++ b/web/src/components/next-markdown-content/index.tsx @@ -46,7 +46,7 @@ import styles from './index.module.less'; const getChunkIndex = (match: string) => parseCitationIndex(match); const isArtifactUrl = (url?: string) => - Boolean(url && url.includes('/document/artifact/')); + Boolean(url && url.includes('/api/v1/documents/artifact/')); const fetchArtifactBlob = async (url: string): Promise => { const response = await request(url, {