Infer Content-Type for document image endpoint (#15368)

## Summary

Fixes [#15367](https://github.com/infiniflow/ragflow/issues/15367) —
`GET /api/v1/documents/images/<image_id>` always returned `Content-Type:
image/JPEG` even for PNG/WebP chunk images and extensioned thumbnails.

## Related Issue

Fixes #15367

## Change Type

- [x] Bug fix
- [x] Regression tests
- [ ] New feature
- [ ] Refactor

## What Changed

- Added `_detect_image_content_type_from_bytes()` —
PNG/JPEG/GIF/WebP/BMP magic-byte detection
- Added `_content_type_for_document_image()` — object-key extension via
`CONTENT_TYPE_MAP`, then magic bytes, else `application/octet-stream`
- **`get_document_image()`** — set inferred `Content-Type` instead of
hardcoded `image/JPEG`
- Also guards missing storage blob (`Image not found.`) to avoid
`make_response(None)` (same handler; complements #15365)

## Files Changed

| File | Change |
|------|--------|
| `api/apps/restful_apis/document_api.py` | MIME inference helpers +
handler update |
|
`test/testcases/test_web_api/test_document_app/test_document_metadata.py`
| 3 unit tests |

## Validation

```bash
cd /root/gittensor/ragflow
pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_content_type_from_object_extension_unit -v
pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_content_type_from_magic_bytes_unit -v
pytest test/testcases/test_web_api/test_document_app/test_document_metadata.py::TestDocumentMetadataUnit::test_get_document_image_missing_blob_unit -v
```

## Test Plan

- [x] `.png` object key → `image/png`
- [x] Extensionless chunk key + PNG bytes → `image/png` (magic bytes)
- [x] Missing blob → 4xx `"Image not found."`
- [ ] CI green
This commit is contained in:
kpdev
2026-06-01 04:08:32 -07:00
committed by GitHub
parent b35266e9a5
commit 252cc19f93
2 changed files with 106 additions and 1 deletions

View File

@@ -1641,6 +1641,32 @@ async def stop_parse_documents(tenant_id, dataset_id):
return get_error_data_result(message="Internal server error")
def _detect_image_content_type_from_bytes(data):
if data.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if data[:3] == b"\xff\xd8\xff":
return "image/jpeg"
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
if data[:2] == b"BM":
return "image/bmp"
return None
def _content_type_for_document_image(object_name, data):
ext_match = re.search(r"\.([^.]+)$", object_name.lower())
if ext_match:
content_type = CONTENT_TYPE_MAP.get(ext_match.group(1))
if content_type and content_type.startswith("image/"):
return content_type
detected = _detect_image_content_type_from_bytes(data)
if detected:
return detected
return "application/octet-stream"
@manager.route("/documents/images/<image_id>", methods=["GET"]) # noqa: F821
async def get_document_image(image_id):
"""
@@ -1670,8 +1696,11 @@ async def get_document_image(image_id):
return get_data_error_result(message="Image not found.")
bkt, nm = image_id.split("-")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, bkt, nm)
if not data:
return get_data_error_result(message="Image not found.")
content_type = _content_type_for_document_image(nm, data)
response = await make_response(data)
response.headers.set("Content-Type", "image/JPEG")
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)

View File

@@ -450,6 +450,82 @@ class TestDocumentMetadataUnit:
assert res["code"] == 500
assert "download boom" in res["message"]
@pytest.mark.p2
def test_get_document_image_content_type_from_object_extension_unit(self, document_app_module, monkeypatch):
module = document_app_module
class _Headers(dict):
def set(self, key, value):
self[key] = value
class _ImageResponse:
def __init__(self, data):
self.data = data
self.headers = _Headers()
png_bytes = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01"
b"\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89"
b"\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01"
b"\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
)
async def fake_thread_pool_exec(*_args, **_kwargs):
return png_bytes
async def fake_make_response(data):
return _ImageResponse(data)
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
monkeypatch.setattr(module, "make_response", fake_make_response)
res = _run(module.get_document_image("kb1-object.png"))
assert isinstance(res, _ImageResponse)
assert res.headers["Content-Type"] == "image/png"
@pytest.mark.p2
def test_get_document_image_content_type_from_magic_bytes_unit(self, document_app_module, monkeypatch):
module = document_app_module
class _Headers(dict):
def set(self, key, value):
self[key] = value
class _ImageResponse:
def __init__(self, data):
self.data = data
self.headers = _Headers()
png_bytes = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01"
b"\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89"
b"\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01"
b"\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
)
async def fake_thread_pool_exec(*_args, **_kwargs):
return png_bytes
async def fake_make_response(data):
return _ImageResponse(data)
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
monkeypatch.setattr(module, "make_response", fake_make_response)
res = _run(module.get_document_image("kb1-a1b2c3d4e5f6"))
assert isinstance(res, _ImageResponse)
assert res.headers["Content-Type"] == "image/png"
@pytest.mark.p2
def test_get_document_image_missing_blob_unit(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_thread_pool_exec(*_args, **_kwargs):
return None
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
res = _run(module.get_document_image("kb1-object-key"))
assert res["code"] == RetCode.DATA_ERROR
assert res["message"] == "Image not found."
@pytest.mark.skip(reason="Moved to /api/v1/documents/images/<image_id>")
def test_get_image_success_and_exception_unit(self, document_app_module, monkeypatch):