mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 10:58:34 +08:00
Refactor: migrate document thumbnails API (#14344)
### What problem does this PR solve? Before migration: GET /v1/document/thumbnails After migration: GET /api/v1/thumbnails ### Type of change - [x] Refactoring
This commit is contained in:
@@ -18,7 +18,6 @@ import re
|
||||
from quart import make_response, request
|
||||
|
||||
from api.apps import current_user, login_required
|
||||
from api.constants import IMG_BASE64_PREFIX
|
||||
from api.db import FileType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
@@ -36,25 +35,6 @@ from common.misc_utils import thread_pool_exec
|
||||
from rag.nlp import search
|
||||
|
||||
|
||||
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
|
||||
# @login_required
|
||||
def thumbnails():
|
||||
doc_ids = request.args.getlist("doc_ids")
|
||||
if not doc_ids:
|
||||
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
try:
|
||||
docs = DocumentService.get_thumbnails(doc_ids)
|
||||
|
||||
for doc_item in docs:
|
||||
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
|
||||
doc_item["thumbnail"] = f"/v1/document/image/{doc_item['kb_id']}-{doc_item['thumbnail']}"
|
||||
|
||||
return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/get/<doc_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get(doc_id):
|
||||
@@ -147,19 +127,3 @@ async def change_parser():
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/image/<image_id>", methods=["GET"]) # noqa: F821
|
||||
# @login_required
|
||||
async def get_image(image_id):
|
||||
try:
|
||||
arr = image_id.split("-")
|
||||
if len(arr) != 2:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", "image/JPEG")
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -719,7 +719,7 @@ def list_docs(dataset_id, tenant_id):
|
||||
renamed_doc_list = [map_doc_keys(doc) for doc in docs]
|
||||
for doc_item in renamed_doc_list:
|
||||
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
|
||||
doc_item["thumbnail"] = f"/v1/document/image/{dataset_id}-{doc_item['thumbnail']}"
|
||||
doc_item["thumbnail"] = f"/api/v1/documents/images/{dataset_id}-{doc_item['thumbnail']}"
|
||||
if doc_item.get("source_type"):
|
||||
doc_item["source_type"] = doc_item["source_type"].split("/")[0]
|
||||
if doc_item["parser_config"].get("metadata"):
|
||||
@@ -1168,6 +1168,44 @@ async def update_metadata_config(tenant_id, dataset_id, document_id):
|
||||
return get_result(data=doc.to_dict())
|
||||
|
||||
|
||||
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
|
||||
def list_thumbnails():
|
||||
"""
|
||||
Get thumbnails for documents.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
parameters:
|
||||
- in: query
|
||||
name: doc_ids
|
||||
type: array
|
||||
required: true
|
||||
description: List of document IDs to get thumbnails for.
|
||||
responses:
|
||||
200:
|
||||
description: Successfully retrieved thumbnails
|
||||
400:
|
||||
description: Missing document IDs
|
||||
"""
|
||||
from api.constants import IMG_BASE64_PREFIX
|
||||
from api.db.services.document_service import DocumentService
|
||||
|
||||
doc_ids = request.args.getlist("doc_ids")
|
||||
if not doc_ids:
|
||||
return get_json_result(data=False, message='Lack of "Document ID"', code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
try:
|
||||
docs = DocumentService.get_thumbnails(doc_ids)
|
||||
|
||||
for doc_item in docs:
|
||||
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
|
||||
doc_item["thumbnail"] = f"/api/v1/documents/images/{doc_item['kb_id']}-{doc_item['thumbnail']}"
|
||||
|
||||
return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/metadatas", methods=["PATCH"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
@@ -1581,6 +1619,42 @@ async def stop_parse_documents(tenant_id, dataset_id):
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
@manager.route("/documents/images/<image_id>", methods=["GET"]) # noqa: F821
|
||||
async def get_document_image(image_id):
|
||||
"""
|
||||
Get a document image by ID.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
parameters:
|
||||
- name: image_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: The image ID (format: bucket-name-image-name)
|
||||
responses:
|
||||
200:
|
||||
description: Image file
|
||||
content:
|
||||
image/jpeg:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
"""
|
||||
try:
|
||||
arr = image_id.split("-")
|
||||
if len(arr) != 2:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", "image/JPEG")
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
ARTIFACT_CONTENT_TYPES = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
|
||||
@@ -451,6 +451,17 @@ def document_change_status(auth, dataset_id, payload=None, *, headers=HEADERS, d
|
||||
return res.json()
|
||||
|
||||
|
||||
def document_thumbnails(auth, params=None, *, headers=HEADERS, data=None):
|
||||
"""Get document thumbnails.
|
||||
|
||||
Args:
|
||||
auth: Authentication object
|
||||
params: Query parameters (e.g., {"doc_ids": ["doc1", "doc2"]})
|
||||
"""
|
||||
res = requests.get(url=f"{HOST_ADDRESS}/api/v1/thumbnails", params=params, headers=headers, auth=auth, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def bulk_upload_documents(auth, kb_id, num, tmp_path):
|
||||
fps = []
|
||||
for i in range(num):
|
||||
|
||||
@@ -288,37 +288,6 @@ class TestDocumentMetadataUnit:
|
||||
assert "Each delete requires key" in res["message"], res
|
||||
|
||||
|
||||
def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
monkeypatch.setattr(module, "request", _DummyRequest(args={}))
|
||||
res = module.thumbnails()
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert 'Lack of "Document ID"' in res["message"]
|
||||
|
||||
monkeypatch.setattr(module, "request", _DummyRequest(args={"doc_ids": ["doc1", "doc2"]}))
|
||||
monkeypatch.setattr(
|
||||
module.DocumentService,
|
||||
"get_thumbnails",
|
||||
lambda _doc_ids: [
|
||||
{"id": "doc1", "kb_id": "kb1", "thumbnail": "thumb.jpg"},
|
||||
{"id": "doc2", "kb_id": "kb1", "thumbnail": f"{module.IMG_BASE64_PREFIX}blob"},
|
||||
],
|
||||
)
|
||||
res = module.thumbnails()
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["doc1"] == "/v1/document/image/kb1-thumb.jpg"
|
||||
assert res["data"]["doc2"] == f"{module.IMG_BASE64_PREFIX}blob"
|
||||
|
||||
def raise_error(*_args, **_kwargs):
|
||||
raise RuntimeError("thumb boom")
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_thumbnails", raise_error)
|
||||
monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)})
|
||||
res = module.thumbnails()
|
||||
assert res["code"] == 500
|
||||
assert "thumb boom" in res["message"]
|
||||
|
||||
|
||||
def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None))
|
||||
@@ -546,6 +515,7 @@ class TestDocumentMetadataUnit:
|
||||
assert res["code"] == 500
|
||||
assert "parser boom" in res["message"]
|
||||
|
||||
@pytest.mark.skip(reason="Moved to /api/v1/documents/images/<image_id>")
|
||||
def test_get_image_success_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ export default {
|
||||
documentCreate: (datasetId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents?type=empty`,
|
||||
documentChangeParser: `${webAPI}/document/change_parser`,
|
||||
documentThumbnails: `${webAPI}/document/thumbnails`,
|
||||
documentThumbnails: `${restAPIv1}/thumbnails`,
|
||||
getDocumentFile: `${webAPI}/document/get`,
|
||||
getDocumentFileDownload: (docId: string) =>
|
||||
`${webAPI}/document/download/${docId}`,
|
||||
|
||||
Reference in New Issue
Block a user