diff --git a/api/apps/restful_apis/agent_api.py b/api/apps/restful_apis/agent_api.py index fada33c594..524b049f42 100644 --- a/api/apps/restful_apis/agent_api.py +++ b/api/apps/restful_apis/agent_api.py @@ -26,8 +26,9 @@ import logging import time from functools import partial, wraps +from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers import jwt -from quart import Response, jsonify, request +from quart import Response, jsonify, request, make_response from api.apps import current_user, login_required from api.apps.services.canvas_replica_service import CanvasReplicaService @@ -2267,3 +2268,28 @@ async def webhook_trace(agent_id: str): "finished": finished, } ) + +@manager.route("/agents//download", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def download_attachment(tenant_id=None, attachment_id=None): + """Stream a document's underlying file to the requesting user. + + Mirrors the authorization model of the preview endpoint: the user must belong + to the tenant that owns the document's knowledge base. A denial returns the + same "Document not found!" response so the endpoint cannot be used to + enumerate doc ids across tenants. + """ + try: + # Keep backward compatibility with older callers and unit tests that still + # pass `attachment_id` instead of the route parameter name. + ext = request.args.get("ext", "markdown") + data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id) + response = await make_response(data) + content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") + apply_safe_file_response_headers(response, content_type, ext) + + return response + + except Exception as e: + return server_error_response(e) diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 57215080d3..9d10ac237f 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -13,17 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from io import BytesIO import logging import json import os import re from pathlib import Path -from quart import request, make_response +from quart import request, make_response,send_file from peewee import OperationalError from pydantic import ValidationError -from api.apps import current_user, login_required +from api.apps import login_required from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX from api.apps.services.document_api_service import validate_document_update_fields, map_doc_keys, \ map_doc_keys_with_run_status, update_document_name_only, update_chunk_method, update_document_status_only, \ @@ -38,7 +39,7 @@ from api.db.services.file_service import FileService from api.db.services.knowledgebase_service import KnowledgebaseService from api.common.check_team_permission import check_kb_team_permission from api.db.services.task_service import TaskService, cancel_all_task_of -from api.utils.api_utils import get_data_error_result, get_error_data_result, get_result, get_json_result, \ +from api.utils.api_utils import construct_json_result, get_data_error_result, get_error_data_result, get_result, get_json_result, \ server_error_response, add_tenant_id_to_kwargs, get_request_json, get_error_argument_result, check_duplicate_ids from api.utils.validation_utils import ( UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq, @@ -1843,8 +1844,6 @@ async def get(doc_id): enumeration. """ try: - if not DocumentService.accessible(doc_id, current_user.id): - return get_data_error_result(message="Document not found!") e, doc = DocumentService.get_by_id(doc_id) if not e: @@ -1865,29 +1864,116 @@ async def get(doc_id): except Exception as e: return server_error_response(e) - -@manager.route("/documents//download", methods=["GET"]) # noqa: F821 +@manager.route("/datasets//documents/", methods=["GET"]) # noqa: F821 @login_required -@add_tenant_id_to_kwargs -async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None): - """Stream a document's underlying file to the requesting user. - - Mirrors the authorization model of the preview endpoint: the user must belong - to the tenant that owns the document's knowledge base. A denial returns the - same "Document not found!" response so the endpoint cannot be used to - enumerate doc ids across tenants. +async def download(dataset_id, document_id): """ - try: - # Keep backward compatibility with older callers and unit tests that still - # pass `attachment_id` instead of the route parameter name. - doc_id = doc_id or attachment_id - ext = request.args.get("ext", "markdown") - data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id) - response = await make_response(data) - content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") - apply_safe_file_response_headers(response, content_type, ext) + Download a document from a dataset. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + produces: + - application/octet-stream + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: path + name: document_id + type: string + required: true + description: ID of the document to download. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Document file stream. + schema: + type: file + 400: + description: Error message. + schema: + type: object + """ + if not document_id: + return get_error_data_result(message="Specify document_id please.") + doc = DocumentService.query(kb_id=dataset_id, id=document_id) + if not doc: + return get_error_data_result(message=f"The dataset not own the document {document_id}.") + # The process of downloading + doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address + file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) + if not file_stream: + return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) + file = BytesIO(file_stream) + # Use send_file with a proper filename and MIME type + return await send_file( + file, + as_attachment=True, + attachment_filename=doc[0].name, + mimetype="application/octet-stream", # Set a default MIME type + ) - return response - - except Exception as e: - return server_error_response(e) +@manager.route("/documents/", methods=["GET"]) # noqa: F821 +@login_required +async def download_document(document_id): + """ + Download a document. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + produces: + - application/octet-stream + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: path + name: document_id + type: string + required: true + description: ID of the document to download. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Document file stream. + schema: + type: file + 400: + description: Error message. + schema: + type: object + """ + if not document_id: + return get_error_data_result(message="Specify document_id please.") + doc = DocumentService.query(id=document_id) + if not doc: + return get_error_data_result(message=f"The dataset not own the document {document_id}.") + # The process of downloading + doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address + file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) + if not file_stream: + return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) + file = BytesIO(file_stream) + # Use send_file with a proper filename and MIME type + return await send_file( + file, + as_attachment=True, + attachment_filename=doc[0].name, + mimetype="application/octet-stream", # Set a default MIME type + ) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 4498b5f5de..f4959f2b1c 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -14,11 +14,6 @@ # limitations under the License. # import logging -from io import BytesIO - -from quart import send_file - -from api.apps import login_required from api.db.db_models import Document, Task from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type from api.db.services.doc_metadata_service import DocMetadataService @@ -50,125 +45,9 @@ def _resolve_reference_metadata(req: dict, search_config: dict | None = None): def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=None) -> None: enrich_chunks_with_document_metadata(chunks, metadata_fields) - -@manager.route("/datasets//documents/", methods=["GET"]) # noqa: F821 -@login_required -async def download(dataset_id, document_id): - """ - Download a document from a dataset. - --- - tags: - - Documents - security: - - ApiKeyAuth: [] - produces: - - application/octet-stream - parameters: - - in: path - name: dataset_id - type: string - required: true - description: ID of the dataset. - - in: path - name: document_id - type: string - required: true - description: ID of the document to download. - - in: header - name: Authorization - type: string - required: true - description: Bearer token for authentication. - responses: - 200: - description: Document file stream. - schema: - type: file - 400: - description: Error message. - schema: - type: object - """ - if not document_id: - return get_error_data_result(message="Specify document_id please.") - doc = DocumentService.query(kb_id=dataset_id, id=document_id) - if not doc: - return get_error_data_result(message=f"The dataset not own the document {document_id}.") - # The process of downloading - doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address - file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) - if not file_stream: - return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) - file = BytesIO(file_stream) - # Use send_file with a proper filename and MIME type - return await send_file( - file, - as_attachment=True, - attachment_filename=doc[0].name, - mimetype="application/octet-stream", # Set a default MIME type - ) - - DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed" DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE" -@manager.route("/documents/", methods=["GET"]) # noqa: F821 -@login_required -async def download_document(document_id): - """ - Download a document. - --- - tags: - - Documents - security: - - ApiKeyAuth: [] - produces: - - application/octet-stream - parameters: - - in: path - name: dataset_id - type: string - required: true - description: ID of the dataset. - - in: path - name: document_id - type: string - required: true - description: ID of the document to download. - - in: header - name: Authorization - type: string - required: true - description: Bearer token for authentication. - responses: - 200: - description: Document file stream. - schema: - type: file - 400: - description: Error message. - schema: - type: object - """ - if not document_id: - return get_error_data_result(message="Specify document_id please.") - doc = DocumentService.query(id=document_id) - if not doc: - return get_error_data_result(message=f"The dataset not own the document {document_id}.") - # The process of downloading - doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address - file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location) - if not file_stream: - return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) - file = BytesIO(file_stream) - # Use send_file with a proper filename and MIME type - return await send_file( - file, - as_attachment=True, - attachment_filename=doc[0].name, - mimetype="application/octet-stream", # Set a default MIME type - ) - @manager.route("/datasets//chunks", methods=["POST"]) # noqa: F821 @token_required async def parse(tenant_id, dataset_id): diff --git a/web/src/components/document-download-button/index.tsx b/web/src/components/document-download-button/index.tsx index 02eefdd461..9288b060f4 100644 --- a/web/src/components/document-download-button/index.tsx +++ b/web/src/components/document-download-button/index.tsx @@ -1,6 +1,6 @@ import { Button } from '@/components/ui/button'; import { IDocumentDownloadInfo } from '@/interfaces/database/chat'; -import { downloadFile } from '@/services/file-manager-service'; +import { downloadAgentFile } from '@/services/file-manager-service'; import { downloadFileFromBlob } from '@/utils/file-util'; import { Download, FileText } from 'lucide-react'; import { useCallback } from 'react'; @@ -20,7 +20,7 @@ export function DocumentDownloadButton({ try { const ext = downloadInfo.filename.split('.').pop()?.toLowerCase() || 'bin'; - const response = await downloadFile({ + const response = await downloadAgentFile({ docId: downloadInfo.doc_id, ext, }); diff --git a/web/src/components/next-message-item/group-button.tsx b/web/src/components/next-message-item/group-button.tsx index 5b23183116..708d7c8000 100644 --- a/web/src/components/next-message-item/group-button.tsx +++ b/web/src/components/next-message-item/group-button.tsx @@ -8,7 +8,7 @@ import { import { useSetModalState } from '@/hooks/common-hooks'; import { IRemoveMessageById } from '@/hooks/logic-hooks'; import { AgentChatContext } from '@/pages/agent/context'; -import { downloadFile } from '@/services/file-manager-service'; +import { downloadAgentFile } from '@/services/file-manager-service'; import { downloadFileFromBlob } from '@/utils/file-util'; import { DeleteOutlined, @@ -125,7 +125,7 @@ export const AssistantGroupButton = ({ value="g" onClick={async () => { try { - const response = await downloadFile({ + const response = await downloadAgentFile({ docId: attachment.doc_id, ext: attachment.format, }); diff --git a/web/src/services/file-manager-service.ts b/web/src/services/file-manager-service.ts index aca836d100..5f2ec2d8ba 100644 --- a/web/src/services/file-manager-service.ts +++ b/web/src/services/file-manager-service.ts @@ -13,7 +13,7 @@ const { getFile, moveFile, getDatasetDocumentFileDownload, - getDocumentFileDownload, + getAttachmentFileDownload, } = api; const methods = { @@ -62,8 +62,8 @@ const fileManagerService = registerServer( request, ); -export const downloadFile = (data: { docId: string; ext: string }) => { - return request.get(getDocumentFileDownload(data.docId), { +export const downloadAgentFile = (data: { docId: string; ext: string }) => { + return request.get(getAttachmentFileDownload(data.docId), { params: { ext: data.ext }, responseType: 'blob', }); diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 03b065cffe..61abf05a61 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -129,8 +129,6 @@ export default { `${restAPIv1}/datasets/${datasetId}/documents/${documentId}`, documentThumbnails: `${restAPIv1}/thumbnails`, getDocumentFile: `${restAPIv1}/documents`, - getDocumentFileDownload: (docId: string) => - `${restAPIv1}/documents/${docId}/download`, documentUpload: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}/documents`, webCrawl: (datasetId: string) => @@ -223,6 +221,8 @@ export default { `${restAPIv1}/agentbots/${canvasId}/inputs`, prompt: `${restAPIv1}/agents/prompts`, cancelDataflow: (id: string) => `${restAPIv1}/tasks/${id}/cancel`, + getAttachmentFileDownload: (docId: string) => + `${restAPIv1}/agents/${docId}/download`, downloadFile: `${restAPIv1}/agents/download`, testWebhook: (id: string) => `${restAPIv1}/agents/${id}/webhook/test`, fetchWebhookTrace: (id: string) => `${restAPIv1}/agents/${id}/webhook/logs`,