mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: migrate artifact API (#14348)
### What problem does this PR solve? Before migration: GET /v1/document/artifact/<filename> After migration: GET /api/v1/documents/artifact/<filename> ### Type of change - [x] Refactoring
This commit is contained in:
@@ -533,7 +533,7 @@ class CodeExec(ToolBase, ABC):
|
||||
|
||||
settings.STORAGE_IMPL.put(SANDBOX_ARTIFACT_BUCKET, storage_name, binary)
|
||||
|
||||
url = f"/v1/document/artifact/{storage_name}"
|
||||
url = f"/api/v1/documents/artifact/{storage_name}"
|
||||
uploaded.append(
|
||||
{
|
||||
"name": name,
|
||||
|
||||
@@ -38,7 +38,7 @@ from api.utils.api_utils import (
|
||||
)
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers, is_valid_url
|
||||
from common import settings
|
||||
from common.constants import SANDBOX_ARTIFACT_BUCKET, RetCode, TaskStatus
|
||||
from common.constants import RetCode, TaskStatus
|
||||
from common.file_utils import get_project_base_directory
|
||||
from common.misc_utils import thread_pool_exec
|
||||
from common.ssrf_guard import assert_url_is_safe
|
||||
@@ -325,44 +325,6 @@ async def get_image(image_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
ARTIFACT_CONTENT_TYPES = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".svg": "image/svg+xml",
|
||||
".pdf": "application/pdf",
|
||||
".csv": "text/csv",
|
||||
".json": "application/json",
|
||||
".html": "text/html",
|
||||
}
|
||||
|
||||
|
||||
@manager.route("/artifact/<filename>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get_artifact(filename):
|
||||
try:
|
||||
bucket = SANDBOX_ARTIFACT_BUCKET
|
||||
# Validate filename: must be uuid hex + allowed extension, nothing else
|
||||
basename = os.path.basename(filename)
|
||||
if basename != filename or "/" in filename or "\\" in filename:
|
||||
return get_data_error_result(message="Invalid filename.")
|
||||
ext = os.path.splitext(basename)[1].lower()
|
||||
if ext not in ARTIFACT_CONTENT_TYPES:
|
||||
return get_data_error_result(message="Invalid file type.")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename)
|
||||
if not data:
|
||||
return get_data_error_result(message="Artifact not found.")
|
||||
content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream")
|
||||
response = await make_response(data)
|
||||
safe_filename = re.sub(r"[^\w.\-]", "_", basename)
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
if not response.headers.get("Content-Disposition"):
|
||||
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/upload_and_parse", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("conversation_id")
|
||||
|
||||
@@ -15,10 +15,11 @@
|
||||
#
|
||||
import logging
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from quart import request
|
||||
from quart import make_response, request
|
||||
from peewee import OperationalError
|
||||
from pydantic import ValidationError
|
||||
|
||||
@@ -41,12 +42,13 @@ from api.utils.validation_utils import (
|
||||
UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq,
|
||||
)
|
||||
from common import settings
|
||||
from common.constants import ParserType, RetCode, TaskStatus
|
||||
from common.constants import ParserType, RetCode, SANDBOX_ARTIFACT_BUCKET, TaskStatus
|
||||
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
|
||||
from common.misc_utils import get_uuid, thread_pool_exec
|
||||
from api.utils.file_utils import filename_type, thumbnail
|
||||
from api.utils.web_utils import html2pdf, is_valid_url
|
||||
from rag.nlp import search
|
||||
from api.utils.web_utils import apply_safe_file_response_headers
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
|
||||
@login_required
|
||||
@@ -1441,3 +1443,65 @@ async def stop_parse_documents(tenant_id, dataset_id):
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
ARTIFACT_CONTENT_TYPES = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".svg": "image/svg+xml",
|
||||
".pdf": "application/pdf",
|
||||
".csv": "text/csv",
|
||||
".json": "application/json",
|
||||
".html": "text/html",
|
||||
}
|
||||
|
||||
|
||||
@manager.route("/documents/artifact/<filename>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get_artifact(filename):
|
||||
"""
|
||||
Get an artifact file.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: filename
|
||||
type: string
|
||||
required: true
|
||||
description: Name of the artifact file.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
responses:
|
||||
200:
|
||||
description: Artifact file returned successfully.
|
||||
"""
|
||||
from common import settings
|
||||
|
||||
try:
|
||||
bucket = SANDBOX_ARTIFACT_BUCKET
|
||||
# Validate filename: must be uuid hex + allowed extension, nothing else
|
||||
basename = os.path.basename(filename)
|
||||
if basename != filename or "/" in filename or "\\" in filename:
|
||||
return get_data_error_result(message="Invalid filename.")
|
||||
ext = os.path.splitext(basename)[1].lower()
|
||||
if ext not in ARTIFACT_CONTENT_TYPES:
|
||||
return get_data_error_result(message="Invalid file type.")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, basename)
|
||||
if not data:
|
||||
return get_data_error_result(message="Artifact not found.")
|
||||
content_type = ARTIFACT_CONTENT_TYPES.get(ext, "application/octet-stream")
|
||||
response = await make_response(data)
|
||||
safe_filename = re.sub(r"[^\w.\-]", "_", basename)
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
if not response.headers.get("Content-Disposition"):
|
||||
response.headers.set("Content-Disposition", f'inline; filename="{safe_filename}"')
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -46,7 +46,7 @@ import styles from './index.module.less';
|
||||
const getChunkIndex = (match: string) => parseCitationIndex(match);
|
||||
|
||||
const isArtifactUrl = (url?: string) =>
|
||||
Boolean(url && url.includes('/document/artifact/'));
|
||||
Boolean(url && url.includes('/api/v1/documents/artifact/'));
|
||||
|
||||
const fetchArtifactBlob = async (url: string): Promise<Blob> => {
|
||||
const response = await request(url, {
|
||||
|
||||
Reference in New Issue
Block a user