From 58819f5d3e1a52bded15cd0693092cc7f69aa980 Mon Sep 17 00:00:00 2001 From: buua436 Date: Fri, 15 May 2026 09:36:58 +0800 Subject: [PATCH] fix: add document download endpoint and refactor existing download function (#14927) ### What problem does this PR solve? add document download endpoint and refactor existing download function ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/restful_apis/document_api.py | 2 - api/apps/sdk/doc.py | 61 +++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 7300a55a9f..7547d2f20b 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -1881,8 +1881,6 @@ async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None): # Keep backward compatibility with older callers and unit tests that still # pass `attachment_id` instead of the route parameter name. doc_id = doc_id or attachment_id - if not DocumentService.accessible(doc_id, current_user.id): - return get_data_error_result(message="Document not found!") ext = request.args.get("ext", "markdown") data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id) response = await make_response(data) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 85242f77b0..4498b5f5de 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -28,7 +28,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.llm_service import LLMBundle from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks from api.db.services.tenant_llm_service import TenantLLMService -from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required, add_tenant_id_to_kwargs +from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required from common import settings from common.constants import LLMType, RetCode, TaskStatus from common.metadata_utils import convert_conditions, meta_filter @@ -53,8 +53,7 @@ def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=No @manager.route("/datasets//documents/", methods=["GET"]) # noqa: F821 @login_required -@add_tenant_id_to_kwargs -async def download(tenant_id, dataset_id, document_id): +async def download(dataset_id, document_id): """ Download a document from a dataset. --- @@ -113,6 +112,62 @@ async def download(tenant_id, dataset_id, document_id): DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed" DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE" +@manager.route("/documents/", methods=["GET"]) # noqa: F821 +@login_required +async def download_document(document_id): + """ + Download a document. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + produces: + - application/octet-stream + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: path + name: document_id + type: string + required: true + description: ID of the document to download. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Document file stream. + schema: + type: file + 400: + description: Error message. + schema: + type: object + """ + if not document_id: + return get_error_data_result(message="Specify document_id please.") + doc = DocumentService.query(id=document_id) + if not doc: + return get_error_data_result(message=f"The dataset not own the document {document_id}.") + # The process of downloading + doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address + file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) + if not file_stream: + return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) + file = BytesIO(file_stream) + # Use send_file with a proper filename and MIME type + return await send_file( + file, + as_attachment=True, + attachment_filename=doc[0].name, + mimetype="application/octet-stream", # Set a default MIME type + ) @manager.route("/datasets//chunks", methods=["POST"]) # noqa: F821 @token_required