mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? Fixes #15115. `GET /api/v1/documents/images/<image_id>` returned **Image not found** when the thumbnail storage object key contained hyphens (e.g. `page-1.png`). Document APIs build URLs as `{dataset_id}-{thumbnail}`, but `get_document_image()` used `image_id.split("-")` and required exactly two segments, so keys like `<kb_id>-page-1.png` were rejected even though the blob existed. This PR splits only on the first hyphen (`split("-", 1)`) and sets `Content-Type` from the object key extension via `CONTENT_TYPE_MAP` instead of hardcoding `image/JPEG`.
This commit is contained in:
@@ -1641,6 +1641,25 @@ async def stop_parse_documents(tenant_id, dataset_id):
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
def _parse_document_image_id(image_id: str) -> tuple[str, str] | None:
|
||||
"""Split a composite document image ID into storage bucket and object key.
|
||||
|
||||
Thumbnail URLs use ``{dataset_id}-{thumbnail}``. Only the first hyphen
|
||||
separates the dataset/kb id (bucket) from the object key, which may
|
||||
contain additional hyphens (e.g. ``page-1.png``).
|
||||
|
||||
Args:
|
||||
image_id: Path segment from ``GET /documents/images/<image_id>``.
|
||||
|
||||
Returns:
|
||||
``(bucket, object_key)`` when valid, otherwise ``None``.
|
||||
"""
|
||||
parts = image_id.split("-", 1)
|
||||
if len(parts) != 2 or not parts[0] or not parts[1]:
|
||||
return None
|
||||
return parts[0], parts[1]
|
||||
|
||||
|
||||
def _detect_image_content_type_from_bytes(data):
|
||||
if data.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
@@ -1680,7 +1699,7 @@ async def get_document_image(image_id):
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: The image ID (format: bucket-name-image-name)
|
||||
description: Composite ID ``{dataset_id}-{thumbnail_object_key}`` (split on first hyphen only)
|
||||
responses:
|
||||
200:
|
||||
description: Image file
|
||||
@@ -1691,10 +1710,10 @@ async def get_document_image(image_id):
|
||||
format: binary
|
||||
"""
|
||||
try:
|
||||
arr = image_id.split("-")
|
||||
if len(arr) != 2:
|
||||
parsed = _parse_document_image_id(image_id)
|
||||
if not parsed:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
bkt, nm = parsed
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
|
||||
if not data:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
|
||||
@@ -563,6 +563,50 @@ class TestDocumentMetadataUnit:
|
||||
assert res["code"] == 500
|
||||
assert "image boom" in res["message"]
|
||||
|
||||
def test_get_document_image_hyphenated_object_key(self, document_app_module, monkeypatch):
|
||||
"""Hyphenated thumbnail keys are parsed with split('-', 1) and return correct MIME type."""
|
||||
module = document_app_module
|
||||
|
||||
class _Headers(dict):
|
||||
def set(self, key, value):
|
||||
self[key] = value
|
||||
|
||||
class _ImageResponse:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.headers = _Headers()
|
||||
|
||||
storage_calls = []
|
||||
|
||||
def _storage_get(bkt, nm):
|
||||
storage_calls.append((bkt, nm))
|
||||
return b"png-bytes"
|
||||
|
||||
async def fake_thread_pool_exec(fn, *args, **kwargs):
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
async def fake_make_response(data):
|
||||
return _ImageResponse(data)
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
monkeypatch.setattr(module, "make_response", fake_make_response)
|
||||
monkeypatch.setattr(
|
||||
module.settings,
|
||||
"STORAGE_IMPL",
|
||||
SimpleNamespace(get=_storage_get),
|
||||
)
|
||||
|
||||
image_id = "kb12345678901234567890123456789012-page-1.png"
|
||||
res = _run(module.get_document_image(image_id))
|
||||
assert isinstance(res, _ImageResponse)
|
||||
assert storage_calls == [("kb12345678901234567890123456789012", "page-1.png")]
|
||||
assert res.headers["Content-Type"] == "image/png"
|
||||
|
||||
res = _run(module.get_document_image("only-one-part"))
|
||||
assert res["code"] == RetCode.DATA_ERROR
|
||||
assert "Image not found" in res["message"]
|
||||
|
||||
|
||||
class TestDocumentBatchChangeStatus:
|
||||
@pytest.mark.p2
|
||||
def test_change_status_partial_failure_matrix(self, WebApiAuth, add_dataset, ragflow_tmp_dir):
|
||||
|
||||
Reference in New Issue
Block a user