mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: dataset document download route (#14910)
### What problem does this PR solve? dataset document download route ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -16,9 +16,10 @@
|
||||
import logging
|
||||
from io import BytesIO
|
||||
|
||||
from quart import request, send_file
|
||||
from quart import send_file
|
||||
|
||||
from api.db.db_models import APIToken, Document, Task
|
||||
from api.apps import login_required
|
||||
from api.db.db_models import Document, Task
|
||||
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.services.document_service import DocumentService
|
||||
@@ -27,7 +28,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required, add_tenant_id_to_kwargs
|
||||
from common import settings
|
||||
from common.constants import LLMType, RetCode, TaskStatus
|
||||
from common.metadata_utils import convert_conditions, meta_filter
|
||||
@@ -51,7 +52,8 @@ def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=No
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def download(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Download a document from a dataset.
|
||||
@@ -90,8 +92,6 @@ async def download(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
if not document_id:
|
||||
return get_error_data_result(message="Specify document_id please.")
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(message=f"You do not own the dataset {dataset_id}.")
|
||||
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
||||
if not doc:
|
||||
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
|
||||
@@ -110,51 +110,6 @@ async def download(tenant_id, dataset_id, document_id):
|
||||
)
|
||||
|
||||
|
||||
@manager.route("/documents/<document_id>", methods=["GET"]) # noqa: F821
|
||||
async def download_doc(document_id):
|
||||
token = request.headers.get("Authorization").split()
|
||||
if len(token) != 2:
|
||||
return get_error_data_result(message="Authorization is not valid!")
|
||||
token = token[1]
|
||||
logging.info("Beta API token lookup attempted for document download")
|
||||
objs = APIToken.query(beta=token)
|
||||
if not objs:
|
||||
logging.warning("Beta API token lookup failed for document download: invalid API key")
|
||||
return get_error_data_result(message='Authentication error: API key is invalid!"')
|
||||
if len(objs) > 1:
|
||||
logging.error("Beta API token lookup is ambiguous for document download: matches=%s", len(objs))
|
||||
return get_error_data_result(message="Authentication error: API key configuration is ambiguous.")
|
||||
tenant_id = objs[0].tenant_id
|
||||
logging.info("Beta API token authorized for document download: tenant_id=%s", tenant_id)
|
||||
|
||||
if not document_id:
|
||||
return get_error_data_result(message="Specify document_id please.")
|
||||
doc = DocumentService.query(id=document_id)
|
||||
if not doc:
|
||||
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
|
||||
if not KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id):
|
||||
logging.warning(
|
||||
"cross-tenant access denied for document download: tenant_id=%s kb_id=%s document_id=%s",
|
||||
tenant_id,
|
||||
doc[0].kb_id,
|
||||
document_id,
|
||||
)
|
||||
return get_error_data_result(message="You do not have access to this document.")
|
||||
# The process of downloading
|
||||
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
||||
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
|
||||
if not file_stream:
|
||||
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
||||
file = BytesIO(file_stream)
|
||||
# Use send_file with a proper filename and MIME type
|
||||
return await send_file(
|
||||
file,
|
||||
as_attachment=True,
|
||||
attachment_filename=doc[0].name,
|
||||
mimetype="application/octet-stream", # Set a default MIME type
|
||||
)
|
||||
|
||||
|
||||
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
|
||||
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
|
||||
|
||||
|
||||
@@ -8,9 +8,10 @@ import {
|
||||
import { DocumentType } from '@/constants/knowledge';
|
||||
import { useRemoveDocument } from '@/hooks/use-document-request';
|
||||
import { IDocumentInfo } from '@/interfaces/database/document';
|
||||
import { downloadDatasetDocument } from '@/services/file-manager-service';
|
||||
import { formatFileSize } from '@/utils/common-util';
|
||||
import { formatDate } from '@/utils/date';
|
||||
import { downloadDocument } from '@/utils/file-util';
|
||||
import { downloadFileFromBlob } from '@/utils/file-util';
|
||||
import { Download, Eye, PenLine, Trash2 } from 'lucide-react';
|
||||
import { useCallback } from 'react';
|
||||
import { UseRenameDocumentShowType } from './use-rename-document';
|
||||
@@ -34,12 +35,22 @@ export function DatasetActionCell({
|
||||
|
||||
const { removeDocument } = useRemoveDocument();
|
||||
|
||||
const onDownloadDocument = useCallback(() => {
|
||||
downloadDocument({
|
||||
id,
|
||||
filename: record.name,
|
||||
});
|
||||
}, [id, record.name]);
|
||||
const onDownloadDocument = useCallback(async () => {
|
||||
try {
|
||||
const ext = record.name.split('.').pop()?.toLowerCase() || 'bin';
|
||||
const response = await downloadDatasetDocument({
|
||||
datasetId: record.dataset_id,
|
||||
docId: id,
|
||||
ext,
|
||||
});
|
||||
const blob = new Blob([response.data], {
|
||||
type: response.data.type,
|
||||
});
|
||||
downloadFileFromBlob(blob, record.name);
|
||||
} catch (error) {
|
||||
console.error('Error downloading document:', error);
|
||||
}
|
||||
}, [id, record.dataset_id, record.name]);
|
||||
|
||||
const handleRemove = useCallback(() => {
|
||||
removeDocument(id);
|
||||
|
||||
@@ -12,6 +12,7 @@ const {
|
||||
getDocumentFile,
|
||||
getFile,
|
||||
moveFile,
|
||||
getDatasetDocumentFileDownload,
|
||||
getDocumentFileDownload,
|
||||
} = api;
|
||||
|
||||
@@ -67,4 +68,18 @@ export const downloadFile = (data: { docId: string; ext: string }) => {
|
||||
responseType: 'blob',
|
||||
});
|
||||
};
|
||||
|
||||
export const downloadDatasetDocument = (data: {
|
||||
datasetId: string;
|
||||
docId: string;
|
||||
ext: string;
|
||||
}) => {
|
||||
return request.get(
|
||||
getDatasetDocumentFileDownload(data.datasetId, data.docId),
|
||||
{
|
||||
params: { ext: data.ext },
|
||||
responseType: 'blob',
|
||||
},
|
||||
);
|
||||
};
|
||||
export default fileManagerService;
|
||||
|
||||
@@ -126,6 +126,8 @@ export default {
|
||||
`${restAPIv1}/datasets/${datasetId}/documents?type=empty`,
|
||||
documentChangeParser: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
||||
getDatasetDocumentFileDownload: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
||||
documentThumbnails: `${restAPIv1}/thumbnails`,
|
||||
getDocumentFile: `${restAPIv1}/documents`,
|
||||
getDocumentFileDownload: (docId: string) =>
|
||||
|
||||
Reference in New Issue
Block a user