Feat: add_chunk supports add image (#13629)

### What problem does this PR solve? Add_chunk supports add image. ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
2026-06-29 15:31:05 +08:00 · 2026-03-16 20:15:36 +08:00
parent 09ff1bc2b0
commit af7e24ba8c
6 changed files with 95 additions and 9 deletions
--- a/api/apps/chunk_app.py
+++ b/api/apps/chunk_app.py
@@ -23,6 +23,7 @@ from quart import request

 from api.db.services.document_service import DocumentService
 from api.db.services.doc_metadata_service import DocMetadataService
+from api.utils.image_utils import store_chunk_image
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from common.metadata_utils import apply_meta_data_filter
@@ -318,6 +319,7 @@ async def create():
    d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
    if "tag_feas" in req:
        d["tag_feas"] = req["tag_feas"]
+    image_base64 = req.get("image_base64", None)

    try:
        def _log_response(resp, code, message):
@@ -365,14 +367,21 @@ async def create():
                    embd_model_config = get_tenant_default_model_by_type(tenant_id, LLMType.EMBEDDING)
            embd_mdl = LLMBundle(tenant_id, embd_model_config)

+            if image_base64:
+                d["img_id"] = "{}-{}".format(doc.kb_id, chunck_id)
+                d["doc_type_kwd"] = "image"
+
            v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
            v = 0.1 * v[0] + 0.9 * v[1]
            d["q_%d_vec" % len(v)] = v.tolist()
            settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)

+            if image_base64:
+                store_chunk_image(doc.kb_id, chunck_id, base64.b64decode(image_base64))
+
            DocumentService.increment_chunk_num(
                doc.id, doc.kb_id, c, 1, 0)
-            resp = get_json_result(data={"chunk_id": chunck_id})
+            resp = get_json_result(data={"chunk_id": chunck_id, "image_id": d.get("img_id", "")})
            _log_response(resp, RetCode.SUCCESS, "success")
            return resp

--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@@ -48,6 +48,7 @@ from common.string_utils import remove_redundant_spaces
 from common.misc_utils import thread_pool_exec
 from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
 from common import settings
+from api.utils.image_utils import store_chunk_image

 MAXIMUM_OF_UPLOADING_FILES = 256

@@ -1190,6 +1191,9 @@ async def add_chunk(tenant_id, dataset_id, document_id):
              items:
                type: string
              description: Important keywords.
+            image_base64:
+              type: string
+              description: Base64-encoded image to associate with the chunk.
      - in: header
        name: Authorization
        type: string
@@ -1254,6 +1258,12 @@ async def add_chunk(tenant_id, dataset_id, document_id):
        d["tag_kwd"] = req["tag_kwd"]
    if "tag_feas" in req:
        d["tag_feas"] = req["tag_feas"]
+    import base64
+    image_base64 = req.get("image_base64", None)
+    if image_base64:
+        d["img_id"] = "{}-{}".format(dataset_id, chunk_id)
+        d["doc_type_kwd"] = "image"
+
    tenant_embd_id = DocumentService.get_tenant_embd_id(document_id)
    if tenant_embd_id:
        model_config = get_model_config_by_id(tenant_embd_id)
@@ -1266,6 +1276,9 @@ async def add_chunk(tenant_id, dataset_id, document_id):
    d["q_%d_vec" % len(v)] = v.tolist()
    settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)

+    if image_base64:
+        store_chunk_image(dataset_id, chunk_id, base64.b64decode(image_base64))
+
    DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
    # rename keys
    key_mapping = {
@@ -1278,6 +1291,7 @@ async def add_chunk(tenant_id, dataset_id, document_id):
        "create_timestamp_flt": "create_timestamp",
        "create_time": "create_time",
        "document_keyword": "document",
+        "img_id": "image_id",
    }
    renamed_chunk = {}
    for key, value in d.items():
--- a/api/utils/image_utils.py
+++ b/api/utils/image_utils.py
@@ -0,0 +1,40 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from io import BytesIO
+
+from PIL import Image
+
+from common import settings
+
+
+def store_chunk_image(bucket, name, image_binary):
+    if settings.STORAGE_IMPL.obj_exist(bucket, name):
+        old_binary = settings.STORAGE_IMPL.get(bucket, name)
+        old_img = Image.open(BytesIO(old_binary))
+        new_img = Image.open(BytesIO(image_binary))
+        old_img = old_img.convert("RGB")
+        new_img = new_img.convert("RGB")
+        width = max(old_img.width, new_img.width)
+        height = old_img.height + new_img.height
+        combined = Image.new("RGB", (width, height), (255, 255, 255))
+        combined.paste(old_img, (0, 0))
+        combined.paste(new_img, (0, old_img.height))
+        buf = BytesIO()
+        combined.save(buf, format="JPEG")
+        settings.STORAGE_IMPL.put(bucket, name, buf.getvalue())
+    else:
+        settings.STORAGE_IMPL.put(bucket, name, image_binary)
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@@ -2005,6 +2005,7 @@ Adds a chunk to a specified document in a specified dataset.
 - Body:
  - `"content"`: `string`
  - `"important_keywords"`: `list[string]`
+  - `"image_base64"`: `string`

 ##### Request example

@@ -2015,22 +2016,25 @@ curl --request POST \
     --header 'Authorization: Bearer <YOUR_API_KEY>' \
     --data '
     {
-          "content": "<CHUNK_CONTENT_HERE>"
+          "content": "<CHUNK_CONTENT_HERE>",
+          "image_base64": "<BASE64_ENCODED_IMAGE>"
     }'
 ```

 ##### Request parameters

- `dataset_id`: (*Path parameter*)  
+- `dataset_id`: (*Path parameter*)
  The associated dataset ID.
- `document_ids`: (*Path parameter*)  
+- `document_ids`: (*Path parameter*)
  The associated document ID.
- `"content"`: (*Body parameter*), `string`, *Required*  
+- `"content"`: (*Body parameter*), `string`, *Required*
  The text content of the chunk.
- `"important_keywords`(*Body parameter*), `list[string]`  
+- `"important_keywords`(*Body parameter*), `list[string]`
  The key terms or phrases to tag with the chunk.
 - `"questions"`(*Body parameter*), `list[string]`
  If there is a given question, the embedded chunks will be based on them
+- `"image_base64"`: (*Body parameter*), `string`
+  A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one.

 #### Response

@@ -2047,6 +2051,7 @@ Success:
            "dataset_id": "72f36e1ebdf411efb7250242ac120006",
            "document_id": "61d68474be0111ef98dd0242ac120006",
            "id": "12ccdc56e59837e5",
+            "image_id": "",
            "important_keywords": [],
            "questions": []
        }
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -855,7 +855,7 @@ print("Async bulk parsing cancelled.")
 ### Add chunk

 ```python
-Document.add_chunk(content:str, important_keywords:list[str] = []) -> Chunk
+Document.add_chunk(content:str, important_keywords:list[str] = [], image_base64:str = None) -> Chunk
 ```

 Adds a chunk to the current document.
@@ -870,6 +870,10 @@ The text content of the chunk.

 The key terms or phrases to tag with the chunk.

+##### image_base64: `str`
+
+A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one.
+
 #### Returns

 - Success: A `Chunk` object.
@@ -880,6 +884,7 @@ A `Chunk` object contains the following attributes:
 - `id`: `str`: The chunk ID.
 - `content`: `str` The text content of the chunk.
 - `important_keywords`: `list[str]` A list of key terms or phrases tagged with the chunk.
+- `image_id`: `str` The image ID associated with the chunk (empty string if no image).
 - `create_time`: `str` The time when the chunk was created (added to the document).
 - `create_timestamp`: `float` The timestamp representing the creation time of the chunk, expressed in seconds since January 1, 1970.
 - `dataset_id`: `str` The ID of the associated dataset.
@@ -902,6 +907,16 @@ doc = doc[0]
 chunk = doc.add_chunk(content="xxxxxxx")
 ```

+Adding a chunk with an image:
+
+```python
+import base64
+
+with open("image.jpg", "rb") as f:
+    img_b64 = base64.b64encode(f.read()).decode()
+chunk = doc.add_chunk(content="description of image", image_base64=img_b64)
+```
+
 ---

 ### List chunks
--- a/sdk/python/ragflow_sdk/modules/document.py
+++ b/sdk/python/ragflow_sdk/modules/document.py
@@ -87,8 +87,11 @@ class Document(Base):
            return chunks
        raise Exception(res.get("message"))

-    def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = []):
-        res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"content": content, "important_keywords": important_keywords, "questions": questions})
+    def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = [], image_base64: str | None = None):
+        body = {"content": content, "important_keywords": important_keywords, "questions": questions}
+        if image_base64 is not None:
+            body["image_base64"] = image_base64
+        res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", body)
        res = res.json()
        if res.get("code") == 0:
            return Chunk(self.rag, res["data"].get("chunk"))