Refactor: migrate artifact API (#14348)

### What problem does this PR solve?

Before migration: GET /v1/document/artifact/<filename>
After migration:  GET /api/v1/documents/artifact/<filename>

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-27 15:19:41 +08:00
committed by GitHub
parent 2846a93998
commit 290f0294d6
4 changed files with 69 additions and 43 deletions

View File

@@ -533,7 +533,7 @@ class CodeExec(ToolBase, ABC):
settings.STORAGE_IMPL.put(SANDBOX_ARTIFACT_BUCKET, storage_name, binary)
url = f"/v1/document/artifact/{storage_name}"
url = f"/api/v1/documents/artifact/{storage_name}"
uploaded.append(
{
"name": name,

View File

@@ -38,7 +38,7 @@ from api.utils.api_utils import (
)
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, is_valid_url
from common import settings
from common.constants import SANDBOX_ARTIFACT_BUCKET, RetCode, TaskStatus
from common.constants import RetCode, TaskStatus
from common.file_utils import get_project_base_directory
from common.misc_utils import thread_pool_exec
from common.ssrf_guard import assert_url_is_safe
@@ -325,44 +325,6 @@ async def get_image(image_id):
return server_error_response(e)
ARTIFACT_CONTENT_TYPES = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".svg": "image/svg+xml",
".pdf": "application/pdf",
".csv": "text/csv",
".json": "application/json",
".html": "text/html",
}
@manager.route("/artifact/<filename>", methods=["GET"]) # noqa: F821
@login_required
async def get_artifact(filename):
try:
bucket = SANDBOX_ARTIFACT_BUCKET
# Validate filename: must be uuid hex + allowed extension, nothing else
basename = os.path.basename(filename)
if basename != filename or "/" in filename or "\\" in filename:
return get_data_error_result(message="Invalid filename.")
ext = os.path.splitext(basename)[1].lower()
if ext not in ARTIFACT_CONTENT_TYPES:
return get_data_error_result(message="Invalid file type.")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename)
if not data:
return get_data_error_result(message="Artifact not found.")
content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream")
response = await make_response(data)
safe_filename = re.sub(r"[^\w.\-]", "_", basename)
apply_safe_file_response_headers(response, content_type, ext)
if not response.headers.get("Content-Disposition"):
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
return response
except Exception as e:
return server_error_response(e)
@manager.route("/upload_and_parse", methods=["POST"]) # noqa: F821
@login_required
@validate_request("conversation_id")

View File

@@ -15,10 +15,11 @@
#
import logging
import json
import os.path
import re
from pathlib import Path
from quart import request
from quart import make_response, request
from peewee import OperationalError
from pydantic import ValidationError
@@ -41,12 +42,13 @@ from api.utils.validation_utils import (
UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq,
)
from common import settings
from common.constants import ParserType, RetCode, TaskStatus
from common.constants import ParserType, RetCode, SANDBOX_ARTIFACT_BUCKET, TaskStatus
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
from common.misc_utils import get_uuid, thread_pool_exec
from api.utils.file_utils import filename_type, thumbnail
from api.utils.web_utils import html2pdf, is_valid_url
from rag.nlp import search
from api.utils.web_utils import apply_safe_file_response_headers
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
@login_required
@@ -1441,3 +1443,65 @@ async def stop_parse_documents(tenant_id, dataset_id):
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
ARTIFACT_CONTENT_TYPES = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".svg": "image/svg+xml",
".pdf": "application/pdf",
".csv": "text/csv",
".json": "application/json",
".html": "text/html",
}
@manager.route("/documents/artifact/<filename>", methods=["GET"]) # noqa: F821
@login_required
async def get_artifact(filename):
"""
Get an artifact file.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: filename
type: string
required: true
description: Name of the artifact file.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Artifact file returned successfully.
"""
from common import settings
try:
bucket = SANDBOX_ARTIFACT_BUCKET
# Validate filename: must be uuid hex + allowed extension, nothing else
basename = os.path.basename(filename)
if basename != filename or "/" in filename or "\\" in filename:
return get_data_error_result(message="Invalid filename.")
ext = os.path.splitext(basename)[1].lower()
if ext not in ARTIFACT_CONTENT_TYPES:
return get_data_error_result(message="Invalid file type.")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename)
if not data:
return get_data_error_result(message="Artifact not found.")
content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream")
response = await make_response(data)
safe_filename = re.sub(r"[^\w.\-]", "_", basename)
apply_safe_file_response_headers(response, content_type, ext)
if not response.headers.get("Content-Disposition"):
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
return response
except Exception as e:
return server_error_response(e)

View File

@@ -46,7 +46,7 @@ import styles from './index.module.less';
const getChunkIndex = (match: string) => parseCitationIndex(match);
const isArtifactUrl = (url?: string) =>
Boolean(url && url.includes('/document/artifact/'));
Boolean(url && url.includes('/api/v1/documents/artifact/'));
const fetchArtifactBlob = async (url: string): Promise<Blob> => {
const response = await request(url, {