From 118f737b3a423665adc98ca1e528f367bcbb2731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A9=E6=B5=B7=E8=92=BC=E7=81=86?= Date: Thu, 5 Mar 2026 13:45:39 +0800 Subject: [PATCH] Feat:Enhance chunk management by adding support for 'available', 'tag_kwd' and 'tag_feas' (#13383) ### What problem does this PR solve? Enhance chunk management by adding support for 'available', 'tag_kwd' and 'tag_feas' fields in list, add, and update chunk functions just like chunk_app.py.This improves data handling and flexibility in chunk processing. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/sdk/doc.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 513a01c4c1..991e292382 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1070,6 +1070,8 @@ async def list_chunks(tenant_id, dataset_id, document_id): "question": question, "sort": True, } + if "available" in req: + query["available_int"] = 1 if req["available"] == "true" else 0 key_mapping = { "chunk_num": "chunk_count", "kb_id": "dataset_id", @@ -1116,6 +1118,8 @@ async def list_chunks(tenant_id, dataset_id, document_id): "image_id": chunk.get("img_id", ""), "available": bool(chunk.get("available_int", 1)), "positions": chunk.get("position_int", []), + "tag_kwd": chunk.get("tag_kwd", []), + "tag_feas": chunk.get("tag_feas", {}), } res["chunks"].append(final_chunk) _ = Chunk(**final_chunk) @@ -1240,6 +1244,10 @@ async def add_chunk(tenant_id, dataset_id, document_id): d["kb_id"] = dataset_id d["docnm_kwd"] = doc.name d["doc_id"] = document_id + if "tag_kwd" in req: + d["tag_kwd"] = req["tag_kwd"] + if "tag_feas" in req: + d["tag_feas"] = req["tag_feas"] embd_id = DocumentService.get_embd_id(document_id) embd_mdl = TenantLLMService.model_instance(tenant_id, LLMType.EMBEDDING.value, embd_id) v, c = embd_mdl.encode([doc.name, req["content"] if not d["question_kwd"] else "\n".join(d["question_kwd"])]) @@ -1434,6 +1442,10 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id): if not isinstance(req["positions"], list): return get_error_data_result("`positions` should be a list") d["position_int"] = req["positions"] + if "tag_kwd" in req: + d["tag_kwd"] = req["tag_kwd"] + if "tag_feas" in req: + d["tag_feas"] = req["tag_feas"] embd_id = DocumentService.get_embd_id(document_id) embd_mdl = TenantLLMService.model_instance(tenant_id, LLMType.EMBEDDING.value, embd_id) if doc.parser_id == ParserType.QA: