mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
fix: add document download endpoint and refactor existing download function (#14927)
### What problem does this PR solve? add document download endpoint and refactor existing download function ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -1881,8 +1881,6 @@ async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None):
|
||||
# Keep backward compatibility with older callers and unit tests that still
|
||||
# pass `attachment_id` instead of the route parameter name.
|
||||
doc_id = doc_id or attachment_id
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
return get_data_error_result(message="Document not found!")
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id)
|
||||
response = await make_response(data)
|
||||
|
||||
@@ -28,7 +28,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required, add_tenant_id_to_kwargs
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
|
||||
from common import settings
|
||||
from common.constants import LLMType, RetCode, TaskStatus
|
||||
from common.metadata_utils import convert_conditions, meta_filter
|
||||
@@ -53,8 +53,7 @@ def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=No
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def download(tenant_id, dataset_id, document_id):
|
||||
async def download(dataset_id, document_id):
|
||||
"""
|
||||
Download a document from a dataset.
|
||||
---
|
||||
@@ -113,6 +112,62 @@ async def download(tenant_id, dataset_id, document_id):
|
||||
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
|
||||
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
|
||||
|
||||
@manager.route("/documents/<document_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def download_document(document_id):
|
||||
"""
|
||||
Download a document.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
produces:
|
||||
- application/octet-stream
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document to download.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
responses:
|
||||
200:
|
||||
description: Document file stream.
|
||||
schema:
|
||||
type: file
|
||||
400:
|
||||
description: Error message.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
if not document_id:
|
||||
return get_error_data_result(message="Specify document_id please.")
|
||||
doc = DocumentService.query(id=document_id)
|
||||
if not doc:
|
||||
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
|
||||
# The process of downloading
|
||||
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
||||
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
|
||||
if not file_stream:
|
||||
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
||||
file = BytesIO(file_stream)
|
||||
# Use send_file with a proper filename and MIME type
|
||||
return await send_file(
|
||||
file,
|
||||
as_attachment=True,
|
||||
attachment_filename=doc[0].name,
|
||||
mimetype="application/octet-stream", # Set a default MIME type
|
||||
)
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/chunks", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
|
||||
Reference in New Issue
Block a user