mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Refa: migrate document preview/download to RESTful API (#14633)
### What problem does this PR solve? migrate document preview/download to RESTful API ### Type of change - [x] Refactoring
This commit is contained in:
@@ -29,6 +29,8 @@ Deprecated APIs and their replacements:
|
||||
- POST /api/v1/file/convert -> POST /api/v1/files/link-to-datasets
|
||||
- GET /api/v1/file/* -> GET /api/v1/files*
|
||||
- POST /api/v1/file/* -> POST /api/v1/files*
|
||||
- GET /api/v1/document/get/{doc_id} -> GET /api/v1/documents/{doc_id}/preview
|
||||
- GET /api/v1/document/download/{doc_id} -> GET /api/v1/documents/{doc_id}/download
|
||||
- POST /api/v1/sessions/related_questions -> POST /api/v1/chat/recommandation
|
||||
- PUT (chunk update) -> PATCH (chunk update)
|
||||
"""
|
||||
@@ -394,6 +396,44 @@ async def deprecated_file_upload_info():
|
||||
tenant_id = current_user.id
|
||||
return await document_api.upload_info(tenant_id=tenant_id)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Document APIs
|
||||
# =============================================================================
|
||||
|
||||
@manager.route("/document/get/<doc_id>", methods=["GET"])
|
||||
@login_required
|
||||
async def deprecated_document_get(doc_id):
|
||||
"""
|
||||
Deprecated: Use GET /api/v1/documents/{doc_id}/preview instead.
|
||||
|
||||
Old path: GET /api/v1/document/get/{doc_id}
|
||||
New path: GET /api/v1/documents/{doc_id}/preview
|
||||
"""
|
||||
logging.warning(
|
||||
"API endpoint /api/v1/document/get/%s is deprecated. "
|
||||
"Please use /api/v1/documents/%s/preview instead.",
|
||||
doc_id, doc_id,
|
||||
)
|
||||
return await document_api.get(doc_id)
|
||||
|
||||
|
||||
@manager.route("/document/download/<doc_id>", methods=["GET"])
|
||||
@login_required
|
||||
async def deprecated_document_download(doc_id):
|
||||
"""
|
||||
Deprecated: Use GET /api/v1/documents/{doc_id}/download instead.
|
||||
|
||||
Old path: GET /api/v1/document/download/{doc_id}
|
||||
New path: GET /api/v1/documents/{doc_id}/download
|
||||
"""
|
||||
logging.warning(
|
||||
"API endpoint /api/v1/document/download/%s is deprecated. "
|
||||
"Please use /api/v1/documents/%s/download instead.",
|
||||
doc_id, doc_id,
|
||||
)
|
||||
return await document_api.download_attachment(doc_id=doc_id)
|
||||
|
||||
# =============================================================================
|
||||
# Agent Chat API
|
||||
# =============================================================================
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
#
|
||||
import re
|
||||
|
||||
from quart import make_response, request
|
||||
|
||||
from api.apps import current_user, login_required
|
||||
from api.db import FileType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.utils.api_utils import (
|
||||
get_data_error_result,
|
||||
server_error_response,
|
||||
)
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
|
||||
from common import settings
|
||||
from common.misc_utils import thread_pool_exec
|
||||
|
||||
|
||||
@manager.route("/get/<doc_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get(doc_id):
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
|
||||
response = await make_response(data)
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
content_type = None
|
||||
if ext:
|
||||
fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application"
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/download/<attachment_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def download_attachment(attachment_id):
|
||||
try:
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
|
||||
response = await make_response(data)
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -33,6 +33,7 @@ from api.db.services import duplicate_name
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.db_models import Task
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.common.check_team_permission import check_kb_team_permission
|
||||
@@ -48,7 +49,7 @@ from common.constants import ParserType, RetCode, TaskStatus, SANDBOX_ARTIFACT_B
|
||||
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
|
||||
from common.misc_utils import get_uuid, thread_pool_exec
|
||||
from api.utils.file_utils import filename_type, thumbnail
|
||||
from api.utils.web_utils import html2pdf, is_valid_url, apply_safe_file_response_headers
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url, apply_safe_file_response_headers
|
||||
from common.ssrf_guard import assert_url_is_safe
|
||||
from rag.nlp import search
|
||||
|
||||
@@ -1854,3 +1855,46 @@ async def batch_update_document_status(tenant_id, dataset_id):
|
||||
if has_error:
|
||||
return get_json_result(data=result, message="Partial failure", code=RetCode.SERVER_ERROR)
|
||||
return get_json_result(data=result)
|
||||
|
||||
@manager.route("/documents/<doc_id>/preview", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get(doc_id):
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
|
||||
response = await make_response(data)
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
content_type = None
|
||||
if ext:
|
||||
fallback_prefix = "image" if doc.type == FileType.VISUAL.value else "application"
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/documents/<doc_id>/download", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def download_attachment(tenant_id=None, doc_id=None, attachment_id=None):
|
||||
try:
|
||||
# Keep backward compatibility with older callers and unit tests that still
|
||||
# pass `attachment_id` instead of the route parameter name.
|
||||
doc_id = doc_id or attachment_id
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, doc_id)
|
||||
response = await make_response(data)
|
||||
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||
apply_safe_file_response_headers(response, content_type, ext)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -6879,14 +6879,18 @@ Failure:
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
```
|
||||
curl --request GET \
|
||||
--url 'http://{address}/api/v1/documents/{doc_id}/download?ext=pdf' \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||
--output ./downloaded_attachment.pdf
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `doc_id`: (*Path parameter*), `string`, *Required*
|
||||
The document ID whose attachment should be downloaded.
|
||||
- `ext`: (*Query parameter*), `string`, *Optional*
|
||||
A file extension hint specifying the response's Content-Type. Defaults to `"markdown"`. Available values:
|
||||
A file extension hint specifying the response's Content-Type. Defaults to `"markdown"`. Available values:
|
||||
- `"markdown"`
|
||||
- `"html"`
|
||||
- `"pdf"`
|
||||
@@ -6896,15 +6900,15 @@ Downloads a runtime attachment previously uploaded via the [Upload document](#up
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
Success:
|
||||
|
||||
Returns the file content as a binary stream with the relevant Content-Type header.
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 500,
|
||||
{
|
||||
"code": 500,
|
||||
"message": "Internal server error"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -26,7 +26,6 @@ from utils.file_utils import create_txt_file
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
|
||||
DATASETS_URL = f"/api/{VERSION}/datasets"
|
||||
DOCUMENT_APP_URL = f"/{VERSION}/document"
|
||||
CHUNK_APP_URL = f"/{VERSION}/chunk"
|
||||
CHUNK_API_URL = f"/api/{VERSION}/datasets/{{dataset_id}}/documents/{{document_id}}/chunks"
|
||||
# SESSION_WITH_CHAT_ASSISTANT_API_URL = "/api/v1/chats/{chat_id}/sessions"
|
||||
@@ -404,10 +403,33 @@ def document_infos(auth, dataset_id, params=None, payload=None, *, headers=HEADE
|
||||
|
||||
|
||||
def document_metadata_summary(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/metadata/summary", headers=headers, auth=auth, json=payload, data=data)
|
||||
dataset_id = (payload or {}).get("kb_id")
|
||||
doc_ids = (payload or {}).get("doc_ids")
|
||||
if not dataset_id:
|
||||
return {"code": 101, "message": "KB ID is required"}
|
||||
params = {}
|
||||
if doc_ids:
|
||||
params["doc_ids"] = ",".join(doc_ids)
|
||||
res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/metadata/summary", headers=headers, auth=auth, params=params, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def document_get(auth, document_id, *, headers=HEADERS, data=None):
|
||||
res = requests.get(url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{document_id}/preview", headers=headers, auth=auth, data=data)
|
||||
return res
|
||||
|
||||
|
||||
def document_download(auth, attachment_id, *, ext="markdown", headers=HEADERS, data=None):
|
||||
res = requests.get(
|
||||
url=f"{HOST_ADDRESS}/api/{VERSION}/documents/{attachment_id}/download",
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
params={"ext": ext},
|
||||
data=data,
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
def document_metadata_update(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
|
||||
"""New unified API for updating document metadata.
|
||||
|
||||
|
||||
@@ -126,11 +126,31 @@ def document_app_module(monkeypatch):
|
||||
monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost"))
|
||||
|
||||
stub_apps = ModuleType("api.apps")
|
||||
stub_apps.__path__ = [str(repo_root / "api" / "apps")]
|
||||
stub_apps.current_user = SimpleNamespace(id="user-1")
|
||||
stub_apps.login_required = lambda func: func
|
||||
monkeypatch.setitem(sys.modules, "api.apps", stub_apps)
|
||||
|
||||
module_path = repo_root / "api" / "apps" / "document_app.py"
|
||||
stub_apps_services = ModuleType("api.apps.services")
|
||||
stub_apps_services.__path__ = [str(repo_root / "api" / "apps" / "services")]
|
||||
monkeypatch.setitem(sys.modules, "api.apps.services", stub_apps_services)
|
||||
|
||||
document_api_service_mod = ModuleType("api.apps.services.document_api_service")
|
||||
document_api_service_mod.validate_document_update_fields = lambda *_args, **_kwargs: (None, None)
|
||||
document_api_service_mod.map_doc_keys = lambda doc: doc.to_dict() if hasattr(doc, "to_dict") else doc
|
||||
|
||||
def _map_doc_keys_with_run_status(doc, run_status="0"):
|
||||
payload = doc if isinstance(doc, dict) else doc.to_dict()
|
||||
return {**payload, "run": run_status}
|
||||
|
||||
document_api_service_mod.map_doc_keys_with_run_status = _map_doc_keys_with_run_status
|
||||
document_api_service_mod.update_document_name_only = lambda *_args, **_kwargs: None
|
||||
document_api_service_mod.update_chunk_method = lambda *_args, **_kwargs: None
|
||||
document_api_service_mod.update_document_status_only = lambda *_args, **_kwargs: None
|
||||
document_api_service_mod.reset_document_for_reparse = lambda *_args, **_kwargs: None
|
||||
monkeypatch.setitem(sys.modules, "api.apps.services.document_api_service", document_api_service_mod)
|
||||
|
||||
module_path = repo_root / "api" / "apps" / "restful_apis" / "document_api.py"
|
||||
spec = importlib.util.spec_from_file_location("test_document_app_unit", module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
module.manager = _DummyManager()
|
||||
|
||||
@@ -394,7 +394,7 @@ class TestDocumentMetadataUnit:
|
||||
"apply_safe_file_response_headers",
|
||||
lambda response, content_type, extension: response.headers.update({"content_type": content_type, "extension": extension}),
|
||||
)
|
||||
res = _run(module.download_attachment("att1"))
|
||||
res = _run(module.download_attachment(attachment_id="att1"))
|
||||
assert isinstance(res, _DummyResponse)
|
||||
assert res.data == b"attachment"
|
||||
assert res.headers["content_type"] == "application/abc"
|
||||
@@ -405,7 +405,7 @@ class TestDocumentMetadataUnit:
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", raise_error)
|
||||
monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)})
|
||||
res = _run(module.download_attachment("att1"))
|
||||
res = _run(module.download_attachment(attachment_id="att1"))
|
||||
assert res["code"] == 500
|
||||
assert "download boom" in res["message"]
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { Authorization } from '@/constants/authorization';
|
||||
import { useGetKnowledgeSearchParams } from '@/hooks/route-hook';
|
||||
import { useGetPipelineResultSearchParams } from '@/pages/dataflow-result/hooks';
|
||||
import api, { webAPI } from '@/utils/api';
|
||||
import api, { restAPIv1 } from '@/utils/api';
|
||||
import { getAuthorization } from '@/utils/authorization-util';
|
||||
import jsPreviewExcel from '@js-preview/excel';
|
||||
import { useSize } from 'ahooks';
|
||||
@@ -57,7 +57,7 @@ export const useGetDocumentUrl = (isAgent: boolean) => {
|
||||
if (isAgent) {
|
||||
return api.downloadFile + `?id=${id}&created_by=${createdBy}`;
|
||||
}
|
||||
return `${webAPI}/document/get/${documentId}`;
|
||||
return `${restAPIv1}/documents/${documentId}/preview`;
|
||||
}, [createdBy, documentId, id, isAgent]);
|
||||
|
||||
return url;
|
||||
|
||||
@@ -26,8 +26,7 @@ import kbService, {
|
||||
uploadDocument,
|
||||
webCrawlDocument,
|
||||
} from '@/services/knowledge-service';
|
||||
import { restAPIv1, webAPI } from '@/utils/api';
|
||||
import { getSearchValue } from '@/utils/common-util';
|
||||
import { restAPIv1 } from '@/utils/api';
|
||||
import { buildChunkHighlights } from '@/utils/document-util';
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
|
||||
import { useDebounce } from 'ahooks';
|
||||
@@ -214,6 +213,7 @@ export const useGetDocumentFilter = (): {
|
||||
const { id } = useParams();
|
||||
const debouncedSearchString = useDebounce(searchString, { wait: 500 });
|
||||
const [open, setOpen] = useState<number>(0);
|
||||
const datasetId = knowledgeId || id;
|
||||
const { data } = useQuery({
|
||||
queryKey: [
|
||||
DocumentApiAction.FetchDocumentFilter,
|
||||
@@ -221,7 +221,10 @@ export const useGetDocumentFilter = (): {
|
||||
knowledgeId,
|
||||
],
|
||||
queryFn: async () => {
|
||||
const { data } = await documentFilter(knowledgeId || id);
|
||||
if (!datasetId) {
|
||||
return;
|
||||
}
|
||||
const { data } = await documentFilter(datasetId);
|
||||
if (data.code === 0) {
|
||||
return data.data;
|
||||
}
|
||||
@@ -504,14 +507,11 @@ export const useCreateDocument = () => {
|
||||
};
|
||||
|
||||
export const useGetDocumentUrl = (documentId?: string) => {
|
||||
const auth = getSearchValue('auth');
|
||||
const getDocumentUrl = useCallback(
|
||||
(id?: string) => {
|
||||
return auth
|
||||
? `${restAPIv1}/documents/${id || documentId}`
|
||||
: `${webAPI}/document/get/${id || documentId}`;
|
||||
return `${restAPIv1}/documents/${id || documentId}/preview`;
|
||||
},
|
||||
[documentId, auth],
|
||||
[documentId],
|
||||
);
|
||||
|
||||
return getDocumentUrl;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Images } from '@/constants/common';
|
||||
import { restAPIv1, webAPI } from '@/utils/api';
|
||||
import { restAPIv1 } from '@/utils/api';
|
||||
import { useParams, useSearchParams } from 'react-router';
|
||||
// import Docx from './docx';
|
||||
// import Excel from './excel';
|
||||
@@ -29,7 +29,7 @@ const DocumentViewer = () => {
|
||||
const api =
|
||||
resource === 'files'
|
||||
? `${restAPIv1}/files/${documentId}`
|
||||
: `${webAPI}/document/get/${documentId}`;
|
||||
: `${restAPIv1}/documents/${documentId}/preview`;
|
||||
// request.head
|
||||
|
||||
if (ext === 'html' && documentId) {
|
||||
|
||||
@@ -127,9 +127,9 @@ export default {
|
||||
documentChangeParser: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
||||
documentThumbnails: `${restAPIv1}/thumbnails`,
|
||||
getDocumentFile: `${webAPI}/document/get`,
|
||||
getDocumentFile: `${restAPIv1}/documents`,
|
||||
getDocumentFileDownload: (docId: string) =>
|
||||
`${webAPI}/document/download/${docId}`,
|
||||
`${restAPIv1}/documents/${docId}/download`,
|
||||
documentUpload: (datasetId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents`,
|
||||
webCrawl: (datasetId: string) =>
|
||||
|
||||
Reference in New Issue
Block a user