Feat: add_chunk supports add image (#13629)

### What problem does this PR solve?

Add_chunk supports add image.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
Yongteng Lei
2026-03-16 20:15:36 +08:00
committed by GitHub
parent 09ff1bc2b0
commit af7e24ba8c
6 changed files with 95 additions and 9 deletions

View File

@@ -23,6 +23,7 @@ from quart import request
from api.db.services.document_service import DocumentService
from api.db.services.doc_metadata_service import DocMetadataService
from api.utils.image_utils import store_chunk_image
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from common.metadata_utils import apply_meta_data_filter
@@ -318,6 +319,7 @@ async def create():
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
if "tag_feas" in req:
d["tag_feas"] = req["tag_feas"]
image_base64 = req.get("image_base64", None)
try:
def _log_response(resp, code, message):
@@ -365,14 +367,21 @@ async def create():
embd_model_config = get_tenant_default_model_by_type(tenant_id, LLMType.EMBEDDING)
embd_mdl = LLMBundle(tenant_id, embd_model_config)
if image_base64:
d["img_id"] = "{}-{}".format(doc.kb_id, chunck_id)
d["doc_type_kwd"] = "image"
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist()
settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
if image_base64:
store_chunk_image(doc.kb_id, chunck_id, base64.b64decode(image_base64))
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, c, 1, 0)
resp = get_json_result(data={"chunk_id": chunck_id})
resp = get_json_result(data={"chunk_id": chunck_id, "image_id": d.get("img_id", "")})
_log_response(resp, RetCode.SUCCESS, "success")
return resp

View File

@@ -48,6 +48,7 @@ from common.string_utils import remove_redundant_spaces
from common.misc_utils import thread_pool_exec
from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
from common import settings
from api.utils.image_utils import store_chunk_image
MAXIMUM_OF_UPLOADING_FILES = 256
@@ -1190,6 +1191,9 @@ async def add_chunk(tenant_id, dataset_id, document_id):
items:
type: string
description: Important keywords.
image_base64:
type: string
description: Base64-encoded image to associate with the chunk.
- in: header
name: Authorization
type: string
@@ -1254,6 +1258,12 @@ async def add_chunk(tenant_id, dataset_id, document_id):
d["tag_kwd"] = req["tag_kwd"]
if "tag_feas" in req:
d["tag_feas"] = req["tag_feas"]
import base64
image_base64 = req.get("image_base64", None)
if image_base64:
d["img_id"] = "{}-{}".format(dataset_id, chunk_id)
d["doc_type_kwd"] = "image"
tenant_embd_id = DocumentService.get_tenant_embd_id(document_id)
if tenant_embd_id:
model_config = get_model_config_by_id(tenant_embd_id)
@@ -1266,6 +1276,9 @@ async def add_chunk(tenant_id, dataset_id, document_id):
d["q_%d_vec" % len(v)] = v.tolist()
settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
if image_base64:
store_chunk_image(dataset_id, chunk_id, base64.b64decode(image_base64))
DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
# rename keys
key_mapping = {
@@ -1278,6 +1291,7 @@ async def add_chunk(tenant_id, dataset_id, document_id):
"create_timestamp_flt": "create_timestamp",
"create_time": "create_time",
"document_keyword": "document",
"img_id": "image_id",
}
renamed_chunk = {}
for key, value in d.items():

40
api/utils/image_utils.py Normal file
View File

@@ -0,0 +1,40 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from io import BytesIO
from PIL import Image
from common import settings
def store_chunk_image(bucket, name, image_binary):
if settings.STORAGE_IMPL.obj_exist(bucket, name):
old_binary = settings.STORAGE_IMPL.get(bucket, name)
old_img = Image.open(BytesIO(old_binary))
new_img = Image.open(BytesIO(image_binary))
old_img = old_img.convert("RGB")
new_img = new_img.convert("RGB")
width = max(old_img.width, new_img.width)
height = old_img.height + new_img.height
combined = Image.new("RGB", (width, height), (255, 255, 255))
combined.paste(old_img, (0, 0))
combined.paste(new_img, (0, old_img.height))
buf = BytesIO()
combined.save(buf, format="JPEG")
settings.STORAGE_IMPL.put(bucket, name, buf.getvalue())
else:
settings.STORAGE_IMPL.put(bucket, name, image_binary)

View File

@@ -2005,6 +2005,7 @@ Adds a chunk to a specified document in a specified dataset.
- Body:
- `"content"`: `string`
- `"important_keywords"`: `list[string]`
- `"image_base64"`: `string`
##### Request example
@@ -2015,22 +2016,25 @@ curl --request POST \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '
{
"content": "<CHUNK_CONTENT_HERE>"
"content": "<CHUNK_CONTENT_HERE>",
"image_base64": "<BASE64_ENCODED_IMAGE>"
}'
```
##### Request parameters
- `dataset_id`: (*Path parameter*)
- `dataset_id`: (*Path parameter*)
The associated dataset ID.
- `document_ids`: (*Path parameter*)
- `document_ids`: (*Path parameter*)
The associated document ID.
- `"content"`: (*Body parameter*), `string`, *Required*
- `"content"`: (*Body parameter*), `string`, *Required*
The text content of the chunk.
- `"important_keywords`(*Body parameter*), `list[string]`
- `"important_keywords`(*Body parameter*), `list[string]`
The key terms or phrases to tag with the chunk.
- `"questions"`(*Body parameter*), `list[string]`
If there is a given question, the embedded chunks will be based on them
- `"image_base64"`: (*Body parameter*), `string`
A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one.
#### Response
@@ -2047,6 +2051,7 @@ Success:
"dataset_id": "72f36e1ebdf411efb7250242ac120006",
"document_id": "61d68474be0111ef98dd0242ac120006",
"id": "12ccdc56e59837e5",
"image_id": "",
"important_keywords": [],
"questions": []
}

View File

@@ -855,7 +855,7 @@ print("Async bulk parsing cancelled.")
### Add chunk
```python
Document.add_chunk(content:str, important_keywords:list[str] = []) -> Chunk
Document.add_chunk(content:str, important_keywords:list[str] = [], image_base64:str = None) -> Chunk
```
Adds a chunk to the current document.
@@ -870,6 +870,10 @@ The text content of the chunk.
The key terms or phrases to tag with the chunk.
##### image_base64: `str`
A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one.
#### Returns
- Success: A `Chunk` object.
@@ -880,6 +884,7 @@ A `Chunk` object contains the following attributes:
- `id`: `str`: The chunk ID.
- `content`: `str` The text content of the chunk.
- `important_keywords`: `list[str]` A list of key terms or phrases tagged with the chunk.
- `image_id`: `str` The image ID associated with the chunk (empty string if no image).
- `create_time`: `str` The time when the chunk was created (added to the document).
- `create_timestamp`: `float` The timestamp representing the creation time of the chunk, expressed in seconds since January 1, 1970.
- `dataset_id`: `str` The ID of the associated dataset.
@@ -902,6 +907,16 @@ doc = doc[0]
chunk = doc.add_chunk(content="xxxxxxx")
```
Adding a chunk with an image:
```python
import base64
with open("image.jpg", "rb") as f:
img_b64 = base64.b64encode(f.read()).decode()
chunk = doc.add_chunk(content="description of image", image_base64=img_b64)
```
---
### List chunks

View File

@@ -87,8 +87,11 @@ class Document(Base):
return chunks
raise Exception(res.get("message"))
def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = []):
res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"content": content, "important_keywords": important_keywords, "questions": questions})
def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = [], image_base64: str | None = None):
body = {"content": content, "important_keywords": important_keywords, "questions": questions}
if image_base64 is not None:
body["image_base64"] = image_base64
res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", body)
res = res.json()
if res.get("code") == 0:
return Chunk(self.rag, res["data"].get("chunk"))