mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Infer Content-Type for document image endpoint (#15368)
## Summary Fixes [#15367](https://github.com/infiniflow/ragflow/issues/15367) — `GET /api/v1/documents/images/<image_id>` always returned `Content-Type: image/JPEG` even for PNG/WebP chunk images and extensioned thumbnails. ## Related Issue Fixes #15367 ## Change Type - [x] Bug fix - [x] Regression tests - [ ] New feature - [ ] Refactor ## What Changed - Added `_detect_image_content_type_from_bytes()` — PNG/JPEG/GIF/WebP/BMP magic-byte detection - Added `_content_type_for_document_image()` — object-key extension via `CONTENT_TYPE_MAP`, then magic bytes, else `application/octet-stream` - **`get_document_image()`** — set inferred `Content-Type` instead of hardcoded `image/JPEG` - Also guards missing storage blob (`Image not found.`) to avoid `make_response(None)` (same handler; complements #15365) ## Files Changed | File | Change | |------|--------| | `api/apps/restful_apis/document_api.py` | MIME inference helpers + handler update | | `test/testcases/test_web_api/test_document_app/test_document_metadata.py` | 3 unit tests | ## Validation ```bash cd /root/gittensor/ragflow pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_content_type_from_object_extension_unit -v pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_content_type_from_magic_bytes_unit -v pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_missing_blob_unit -v ``` ## Test Plan - [x] `.png` object key → `image/png` - [x] Extensionless chunk key + PNG bytes → `image/png` (magic bytes) - [x] Missing blob → 4xx `"Image not found."` - [ ] CI green
This commit is contained in:
@@ -1641,6 +1641,32 @@ async def stop_parse_documents(tenant_id, dataset_id):
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
def _detect_image_content_type_from_bytes(data):
|
||||
if data.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return "image/jpeg"
|
||||
if data[:6] in (b"GIF87a", b"GIF89a"):
|
||||
return "image/gif"
|
||||
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
if data[:2] == b"BM":
|
||||
return "image/bmp"
|
||||
return None
|
||||
|
||||
|
||||
def _content_type_for_document_image(object_name, data):
|
||||
ext_match = re.search(r"\.([^.]+)$", object_name.lower())
|
||||
if ext_match:
|
||||
content_type = CONTENT_TYPE_MAP.get(ext_match.group(1))
|
||||
if content_type and content_type.startswith("image/"):
|
||||
return content_type
|
||||
detected = _detect_image_content_type_from_bytes(data)
|
||||
if detected:
|
||||
return detected
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
@manager.route("/documents/images/<image_id>", methods=["GET"]) # noqa: F821
|
||||
async def get_document_image(image_id):
|
||||
"""
|
||||
@@ -1670,8 +1696,11 @@ async def get_document_image(image_id):
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
|
||||
if not data:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
content_type = _content_type_for_document_image(nm, data)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", "image/JPEG")
|
||||
response.headers.set("Content-Type", content_type)
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@@ -450,6 +450,82 @@ class TestDocumentMetadataUnit:
|
||||
assert res["code"] == 500
|
||||
assert "download boom" in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_get_document_image_content_type_from_object_extension_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
class _Headers(dict):
|
||||
def set(self, key, value):
|
||||
self[key] = value
|
||||
|
||||
class _ImageResponse:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.headers = _Headers()
|
||||
|
||||
png_bytes = (
|
||||
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01"
|
||||
b"\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89"
|
||||
b"\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01"
|
||||
b"\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
|
||||
)
|
||||
|
||||
async def fake_thread_pool_exec(*_args, **_kwargs):
|
||||
return png_bytes
|
||||
|
||||
async def fake_make_response(data):
|
||||
return _ImageResponse(data)
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
monkeypatch.setattr(module, "make_response", fake_make_response)
|
||||
res = _run(module.get_document_image("kb1-object.png"))
|
||||
assert isinstance(res, _ImageResponse)
|
||||
assert res.headers["Content-Type"] == "image/png"
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_get_document_image_content_type_from_magic_bytes_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
class _Headers(dict):
|
||||
def set(self, key, value):
|
||||
self[key] = value
|
||||
|
||||
class _ImageResponse:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.headers = _Headers()
|
||||
|
||||
png_bytes = (
|
||||
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01"
|
||||
b"\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89"
|
||||
b"\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01"
|
||||
b"\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
|
||||
)
|
||||
|
||||
async def fake_thread_pool_exec(*_args, **_kwargs):
|
||||
return png_bytes
|
||||
|
||||
async def fake_make_response(data):
|
||||
return _ImageResponse(data)
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
monkeypatch.setattr(module, "make_response", fake_make_response)
|
||||
res = _run(module.get_document_image("kb1-a1b2c3d4e5f6"))
|
||||
assert isinstance(res, _ImageResponse)
|
||||
assert res.headers["Content-Type"] == "image/png"
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_get_document_image_missing_blob_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
async def fake_thread_pool_exec(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
res = _run(module.get_document_image("kb1-object-key"))
|
||||
assert res["code"] == RetCode.DATA_ERROR
|
||||
assert res["message"] == "Image not found."
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Moved to /api/v1/documents/images/<image_id>")
|
||||
def test_get_image_success_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
|
||||
Reference in New Issue
Block a user