Fix: dataset document download route (#14910)

### What problem does this PR solve?

dataset document download route
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
buua436
2026-05-14 10:59:06 +08:00
committed by GitHub
parent 1c0eaa504b
commit b89878c593
4 changed files with 41 additions and 58 deletions

View File

@@ -16,9 +16,10 @@
import logging
from io import BytesIO
from quart import request, send_file
from quart import send_file
from api.db.db_models import APIToken, Document, Task
from api.apps import login_required
from api.db.db_models import Document, Task
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.document_service import DocumentService
@@ -27,7 +28,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
from api.db.services.tenant_llm_service import TenantLLMService
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required, add_tenant_id_to_kwargs
from common import settings
from common.constants import LLMType, RetCode, TaskStatus
from common.metadata_utils import convert_conditions, meta_filter
@@ -51,7 +52,8 @@ def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=No
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
@token_required
@login_required
@add_tenant_id_to_kwargs
async def download(tenant_id, dataset_id, document_id):
"""
Download a document from a dataset.
@@ -90,8 +92,6 @@ async def download(tenant_id, dataset_id, document_id):
"""
if not document_id:
return get_error_data_result(message="Specify document_id please.")
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(message=f"You do not own the dataset {dataset_id}.")
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
if not doc:
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
@@ -110,51 +110,6 @@ async def download(tenant_id, dataset_id, document_id):
)
@manager.route("/documents/<document_id>", methods=["GET"]) # noqa: F821
async def download_doc(document_id):
token = request.headers.get("Authorization").split()
if len(token) != 2:
return get_error_data_result(message="Authorization is not valid!")
token = token[1]
logging.info("Beta API token lookup attempted for document download")
objs = APIToken.query(beta=token)
if not objs:
logging.warning("Beta API token lookup failed for document download: invalid API key")
return get_error_data_result(message='Authentication error: API key is invalid!"')
if len(objs) > 1:
logging.error("Beta API token lookup is ambiguous for document download: matches=%s", len(objs))
return get_error_data_result(message="Authentication error: API key configuration is ambiguous.")
tenant_id = objs[0].tenant_id
logging.info("Beta API token authorized for document download: tenant_id=%s", tenant_id)
if not document_id:
return get_error_data_result(message="Specify document_id please.")
doc = DocumentService.query(id=document_id)
if not doc:
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
if not KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id):
logging.warning(
"cross-tenant access denied for document download: tenant_id=%s kb_id=%s document_id=%s",
tenant_id,
doc[0].kb_id,
document_id,
)
return get_error_data_result(message="You do not have access to this document.")
# The process of downloading
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
if not file_stream:
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
file = BytesIO(file_stream)
# Use send_file with a proper filename and MIME type
return await send_file(
file,
as_attachment=True,
attachment_filename=doc[0].name,
mimetype="application/octet-stream", # Set a default MIME type
)
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"

View File

@@ -8,9 +8,10 @@ import {
import { DocumentType } from '@/constants/knowledge';
import { useRemoveDocument } from '@/hooks/use-document-request';
import { IDocumentInfo } from '@/interfaces/database/document';
import { downloadDatasetDocument } from '@/services/file-manager-service';
import { formatFileSize } from '@/utils/common-util';
import { formatDate } from '@/utils/date';
import { downloadDocument } from '@/utils/file-util';
import { downloadFileFromBlob } from '@/utils/file-util';
import { Download, Eye, PenLine, Trash2 } from 'lucide-react';
import { useCallback } from 'react';
import { UseRenameDocumentShowType } from './use-rename-document';
@@ -34,12 +35,22 @@ export function DatasetActionCell({
const { removeDocument } = useRemoveDocument();
const onDownloadDocument = useCallback(() => {
downloadDocument({
id,
filename: record.name,
});
}, [id, record.name]);
const onDownloadDocument = useCallback(async () => {
try {
const ext = record.name.split('.').pop()?.toLowerCase() || 'bin';
const response = await downloadDatasetDocument({
datasetId: record.dataset_id,
docId: id,
ext,
});
const blob = new Blob([response.data], {
type: response.data.type,
});
downloadFileFromBlob(blob, record.name);
} catch (error) {
console.error('Error downloading document:', error);
}
}, [id, record.dataset_id, record.name]);
const handleRemove = useCallback(() => {
removeDocument(id);

View File

@@ -12,6 +12,7 @@ const {
getDocumentFile,
getFile,
moveFile,
getDatasetDocumentFileDownload,
getDocumentFileDownload,
} = api;
@@ -67,4 +68,18 @@ export const downloadFile = (data: { docId: string; ext: string }) => {
responseType: 'blob',
});
};
export const downloadDatasetDocument = (data: {
datasetId: string;
docId: string;
ext: string;
}) => {
return request.get(
getDatasetDocumentFileDownload(data.datasetId, data.docId),
{
params: { ext: data.ext },
responseType: 'blob',
},
);
};
export default fileManagerService;

View File

@@ -126,6 +126,8 @@ export default {
`${restAPIv1}/datasets/${datasetId}/documents?type=empty`,
documentChangeParser: (datasetId: string, documentId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
getDatasetDocumentFileDownload: (datasetId: string, documentId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
documentThumbnails: `${restAPIv1}/thumbnails`,
getDocumentFile: `${restAPIv1}/documents`,
getDocumentFileDownload: (docId: string) =>