From 62f94cd59b505b56df724faba4d4a2a8ce79679a Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Thu, 2 Jul 2026 23:22:07 +0800
Subject: [PATCH] Feat: Add knowledge compilation workflows (#16515)

## Summary
- Add knowledge compilation template APIs, services, and builtin
template seed data
- Add advanced knowledge compile structure/artifact/RAPTOR workflow
support
- Update parsing, dataset/document APIs, and supporting services for
compilation workflows
---
 api/apps/restful_apis/chunk_api.py            |  353 +-
 .../restful_apis/compilation_template_api.py  |   54 +
 .../compilation_template_group_api.py         |  172 +
 api/apps/restful_apis/dataset_api.py          |  259 +-
 api/apps/restful_apis/document_api.py         |   67 +-
 api/apps/restful_apis/file_commit_api.py      |  245 +-
 .../utils/compilation_template_validation.py  |   79 +
 api/apps/services/dataset_api_service.py      | 1133 +++++-
 api/apps/services/document_api_service.py     |   14 +-
 api/db/__init__.py                            |   53 +-
 api/db/db_models.py                           |  195 +-
 api/db/init_data.py                           |   24 +-
 .../compilation_templates/artifacts.yaml      |  136 +
 .../compilation_templates/empty.yaml          |   17 +
 .../knowledge_graph.yaml                      |   74 +
 .../compilation_templates/mind_map.yaml       |   69 +
 .../compilation_templates/page_index.yaml     |   33 +
 .../compilation_templates/timeline.yaml       |   48 +
 .../init_data/compilation_templates/tree.yaml |   31 +
 .../compilation_template_group_service.py     |  394 ++
 .../services/compilation_template_service.py  |  231 ++
 api/db/services/dialog_service.py             |    3 +-
 api/db/services/document_service.py           |  111 +-
 api/db/services/file_commit_service.py        |  523 ++-
 api/db/services/knowledgebase_service.py      |  110 +-
 .../pipeline_operation_log_service.py         |   16 +-
 api/db/services/task_service.py               |  188 +-
 api/utils/validation_utils.py                 |   22 +-
 common/constants.py                           |    4 +-
 conf/infinity_mapping.json                    |   34 +-
 deepdoc/parser/pdf_parser.py                  |   12 +-
 rag/advanced_rag/knowlege_compile/__init__.py |   43 +
 rag/advanced_rag/knowlege_compile/_common.py  |  913 +++++
 .../knowlege_compile/dataset_nav.py           |  437 ++
 .../knowlege_compile}/mind_map_extractor.py   |   47 +-
 .../knowlege_compile}/raptor.py               |  335 +-
 .../knowlege_compile/structure.py             | 1637 ++++++++
 rag/advanced_rag/knowlege_compile/wiki.py     | 3576 +++++++++++++++++
 rag/flow/extractor/extractor.py               |   57 +-
 rag/nlp/search.py                             |  229 +-
 rag/svr/task_executor.py                      |   25 +-
 .../chunk_post_processor.py                   |  901 ++++-
 .../dataset_skill_generator.py                |  588 +++
 .../dataset_wiki_generator.py                 |  805 ++++
 .../task_executor_refactor/raptor_service.py  |  537 ++-
 .../task_executor_refactor/raptor_utils.py    |   27 +-
 .../task_executor_refactor/task_handler.py    |  517 ++-
 rag/utils/es_conn.py                          |  138 +-
 rag/utils/opensearch_conn.py                  |  187 +-
 rag/utils/redis_conn.py                       |    8 +
 .../test_file_commit_routes_unit.py           |   12 +-
 .../rag/graphrag/test_checkpoint_resume.py    |    2 +-
 .../test_chunk_post_processor.py              |  438 --
 .../test_chunk_service.py                     |  318 --
 .../test_task_handler.py                      |  316 --
 .../test_task_handler_integration.py          |  856 ----
 .../rag/test_raptor_psi_tree_builder.py       |   28 +-
 57 files changed, 14587 insertions(+), 3094 deletions(-)
 create mode 100644 api/apps/restful_apis/compilation_template_api.py
 create mode 100644 api/apps/restful_apis/compilation_template_group_api.py
 create mode 100644 api/apps/restful_apis/utils/compilation_template_validation.py
 create mode 100644 api/db/init_data/compilation_templates/artifacts.yaml
 create mode 100644 api/db/init_data/compilation_templates/empty.yaml
 create mode 100644 api/db/init_data/compilation_templates/knowledge_graph.yaml
 create mode 100644 api/db/init_data/compilation_templates/mind_map.yaml
 create mode 100644 api/db/init_data/compilation_templates/page_index.yaml
 create mode 100644 api/db/init_data/compilation_templates/timeline.yaml
 create mode 100644 api/db/init_data/compilation_templates/tree.yaml
 create mode 100644 api/db/services/compilation_template_group_service.py
 create mode 100644 api/db/services/compilation_template_service.py
 create mode 100644 rag/advanced_rag/knowlege_compile/__init__.py
 create mode 100644 rag/advanced_rag/knowlege_compile/_common.py
 create mode 100644 rag/advanced_rag/knowlege_compile/dataset_nav.py
 rename rag/{graphrag/general => advanced_rag/knowlege_compile}/mind_map_extractor.py (79%)
 rename rag/{ => advanced_rag/knowlege_compile}/raptor.py (68%)
 create mode 100644 rag/advanced_rag/knowlege_compile/structure.py
 create mode 100644 rag/advanced_rag/knowlege_compile/wiki.py
 create mode 100644 rag/svr/task_executor_refactor/dataset_skill_generator.py
 create mode 100644 rag/svr/task_executor_refactor/dataset_wiki_generator.py
 delete mode 100644 test/unit_test/rag/svr/task_executor_refactor/test_chunk_post_processor.py
 delete mode 100644 test/unit_test/rag/svr/task_executor_refactor/test_chunk_service.py
 delete mode 100644 test/unit_test/rag/svr/task_executor_refactor/test_task_handler.py
 delete mode 100644 test/unit_test/rag/svr/task_executor_refactor/test_task_handler_integration.py

diff --git a/api/apps/restful_apis/chunk_api.py b/api/apps/restful_apis/chunk_api.py
index 8f16ea28f1..b91519db0c 100644
--- a/api/apps/restful_apis/chunk_api.py
+++ b/api/apps/restful_apis/chunk_api.py
@@ -16,6 +16,7 @@
 import base64
 import binascii
 import datetime
+import json
 import logging
 import re
 
@@ -54,6 +55,7 @@ from api.utils.reference_metadata_utils import (
 )
 from common import settings
 from common.constants import LLMType, ParserType, RetCode, TaskStatus
+from common.doc_store.doc_store_base import OrderByExpr
 from common.metadata_utils import convert_conditions, meta_filter
 from common.misc_utils import thread_pool_exec
 from common.string_utils import is_content_empty, remove_redundant_spaces
@@ -135,6 +137,19 @@ def _map_doc(doc):
     return renamed_doc
 
 
+def _get_query_id_list(args, name: str) -> list[str]:
+    values = args.getlist(name) if hasattr(args, "getlist") else [args.get(name)]
+    ids: list[str] = []
+    seen: set[str] = set()
+    for value in values:
+        for item in str(value or "").split(","):
+            item = item.strip()
+            if item and item not in seen:
+                ids.append(item)
+                seen.add(item)
+    return ids
+
+
 def _strip_chunk_runtime_fields(chunk):
     for name in [name for name in chunk.keys() if re.search(r"(_vec$|_sm_|_tks|_ltks)", name)]:
         del chunk[name]
@@ -148,6 +163,15 @@ def _get_dataset_tenant_id(dataset_id):
     return kb.tenant_id
 
 
+def _compilation_template_kind(kind) -> str:
+    if not isinstance(kind, str):
+        return ""
+    normalized = kind.strip().lower().replace("-", "_")
+    if normalized in {"pageindex", "page_index", "knowledge_graph"}:
+        return "timeline"
+    return normalized
+
+
 def _resolve_reference_metadata(req: dict, search_config: dict | None = None):
     return resolve_reference_metadata_preferences(req, search_config)
 
@@ -169,7 +193,9 @@ async def parse(tenant_id, dataset_id):
     if not e:
         return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
     if kb.pipeline_id:
-        return get_error_data_result(message="Datasets configured with an ingestion pipeline cannot be parsed with `/datasets/{dataset_id}/chunks`. Use `/documents/ingest` instead.", code=RetCode.ARGUMENT_ERROR)
+        return get_error_data_result(
+            message="Datasets configured with an ingestion pipeline cannot be parsed with `/datasets/{dataset_id}/chunks`. Use `/documents/ingest` instead.", code=RetCode.ARGUMENT_ERROR
+        )
     req = await get_request_json()
     if not req.get("document_ids"):
         return get_error_data_result("`document_ids` is required")
@@ -363,9 +389,19 @@ async def retrieval_test(tenant_id):
             question += await keyword_extraction(LLMBundle(kb.tenant_id, chat_model_config), question)
 
         ranks = await settings.retriever.retrieval(
-            question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold,
-            vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl,
-            highlight=highlight, rank_feature=label_question(question, kbs),
+            question,
+            embd_mdl,
+            tenant_ids,
+            kb_ids,
+            page,
+            size,
+            similarity_threshold,
+            vector_similarity_weight,
+            top,
+            doc_ids,
+            rerank_mdl=rerank_mdl,
+            highlight=highlight,
+            rank_feature=label_question(question, kbs),
         )
         if toc_enhance:
             chat_model_config = get_tenant_default_model_by_type(kb.tenant_id, LLMType.CHAT)
@@ -421,13 +457,17 @@ async def list_chunks(tenant_id, dataset_id, document_id):
     page = int(req.get("page", 1))
     size = validate_rest_api_page_size(int(req.get("page_size", 30)))
     question = req.get("keywords", "")
+    chunk_ids = _get_query_id_list(req, "chunk_ids")
     query = {
         "doc_ids": [document_id],
         "page": page,
         "size": size,
         "question": question,
         "sort": True,
+        "must_not": {"exists": "compile_kwd"},
     }
+    if chunk_ids:
+        query["id"] = chunk_ids
     if "available" in req:
         query["available_int"] = 1 if req["available"] == "true" else 0
 
@@ -438,6 +478,8 @@ async def list_chunks(tenant_id, dataset_id, document_id):
             return get_result(message=f"Chunk not found: {dataset_id}/{req.get('id')}", code=RetCode.DATA_ERROR)
         if str(chunk.get("doc_id", chunk.get("document_id"))) != str(document_id):
             return get_result(message=f"Chunk not found: {dataset_id}/{req.get('id')}", code=RetCode.DATA_ERROR)
+        if chunk.get("compile_kwd"):
+            return get_result(message=f"Chunk not found: {dataset_id}/{req.get('id')}", code=RetCode.DATA_ERROR)
         _strip_chunk_runtime_fields(chunk)
         res["total"] = 1
         final_chunk = {
@@ -468,11 +510,7 @@ async def list_chunks(tenant_id, dataset_id, document_id):
         for chunk_id in sres.ids:
             d = {
                 "id": chunk_id,
-                "content": (
-                    remove_redundant_spaces(sres.highlight[chunk_id])
-                    if question and chunk_id in sres.highlight
-                    else sres.field[chunk_id].get("content_with_weight", "")
-                ),
+                "content": (remove_redundant_spaces(sres.highlight[chunk_id]) if question and chunk_id in sres.highlight else sres.field[chunk_id].get("content_with_weight", "")),
                 "document_id": sres.field[chunk_id]["doc_id"],
                 "docnm_kwd": sres.field[chunk_id]["docnm_kwd"],
                 "important_keywords": sres.field[chunk_id].get("important_kwd", []),
@@ -506,6 +544,8 @@ async def get_chunk(tenant_id, dataset_id, document_id, chunk_id):
         chunk = settings.docStoreConn.get(chunk_id, search.index_name(dataset_tenant_id), [dataset_id])
         if chunk is None or str(chunk.get("doc_id", chunk.get("document_id"))) != str(document_id):
             return get_result(data=False, message="Chunk not found!", code=RetCode.DATA_ERROR)
+        if chunk.get("compile_kwd"):
+            return get_result(data=False, message="Chunk not found!", code=RetCode.DATA_ERROR)
         return get_result(data=_strip_chunk_runtime_fields(chunk))
     except Exception as e:
         if str(e).find("NotFoundError") >= 0:
@@ -513,6 +553,292 @@ async def get_chunk(tenant_id, dataset_id, document_id, chunk_id):
         return server_error_response(e)
 
 
+@manager.route("/datasets/<dataset_id>/documents/<document_id>/structure/graph", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def get_document_structure_graph(tenant_id, dataset_id, document_id):
+    """Return per-template structure graphs for a document.
+
+    Response shape::
+
+        {
+          "templates": [
+            {
+              "template_id": "<id> | 'legacy:<compile_kwd>'",
+              "template_name": "<display name>",
+              "kind": "list | set | hypergraph | timeline | page_index | …",
+              "entities": [...],
+              "relations": [...]
+            },
+            ...
+          ]
+        }
+
+    Rows that pre-date the ``compilation_template_ids`` stamp are surfaced
+    under a synthetic ``legacy:<compile_kwd>`` bucket so an in-flight
+    migration doesn't drop their data on the floor. Empty templates
+    (zero entities AND zero relations) are filtered out.
+    """
+    from rag.nlp import search
+    from api.db.services.compilation_template_group_service import CompilationTemplateGroupService
+
+    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+    dataset_tenant_id = _get_dataset_tenant_id(dataset_id)
+    if not dataset_tenant_id:
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+    docs = DocumentService.query(id=document_id, kb_id=dataset_id)
+    if not docs:
+        return get_error_data_result(message=f"You don't own the document {document_id}.")
+
+    # Resolve the doc's configured template group → child template ids
+    # so we can render tabs in the order the user picked them.
+    # Artifacts-kind templates render on the dataset Artifact tab, not
+    # here, so they're filtered out.
+    parser_config = docs[0].parser_config or {}
+
+    def _group_ids(raw) -> list[str]:
+        if isinstance(raw, str):
+            raw = [raw]
+        if not isinstance(raw, list):
+            return []
+        ids: list[str] = []
+        seen: set[str] = set()
+        for gid in raw:
+            if not isinstance(gid, str):
+                continue
+            gid = gid.strip()
+            if gid and gid not in seen:
+                seen.add(gid)
+                ids.append(gid)
+        return ids
+
+    group_ids: list[str] = []
+    if isinstance(parser_config, dict):
+        if "compilation_template_group_id" in parser_config:
+            group_ids = _group_ids(parser_config.get("compilation_template_group_id"))
+        elif isinstance(parser_config.get("ext"), dict):
+            group_ids = _group_ids(parser_config["ext"].get("compilation_template_group_id"))
+
+    configured_ids: list[str] = []
+    seen_configured_ids: set[str] = set()
+    template_meta: dict[str, dict] = {}
+    template_meta_by_kind: dict[str, list[dict]] = {}
+    for group_id in group_ids:
+        group = CompilationTemplateGroupService.get_saved(group_id, tenant_id)
+        if not group:
+            continue
+        for template in group.get("templates") or []:
+            if not isinstance(template, dict):
+                continue
+            template_id = str(template.get("id") or "").strip()
+            if not template_id or template_id in seen_configured_ids:
+                continue
+            config = template.get("config") if isinstance(template.get("config"), dict) else {}
+            raw_kind = (config.get("kind") if isinstance(config, dict) else "") or template.get("kind") or ""
+            kind_norm = _compilation_template_kind(raw_kind)
+            if kind_norm == "artifacts":
+                continue
+            seen_configured_ids.add(template_id)
+            configured_ids.append(template_id)
+            meta = {
+                "template_id": template_id,
+                "template_name": template.get("name") or template_id,
+                "kind": raw_kind or kind_norm,
+                "kind_norm": kind_norm,
+            }
+            template_meta[template_id] = meta
+            template_meta_by_kind.setdefault(kind_norm, []).append(meta)
+
+    # Load every graph row for this doc in one shot. Each row corresponds
+    # to one (compile_kwd, template_id) tuple — written by
+    # ``_struct_upsert_graph_json``.
+    index_name = search.index_name(dataset_tenant_id)
+    fields = [
+        "content_with_weight",
+        "compile_kwd",
+        "compilation_template_ids",
+        "compilation_template_kind_kwd",
+    ]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            fields,
+            [],
+            {"doc_id": [document_id], "knowledge_graph_kwd": ["graph"]},
+            [],
+            OrderByExpr(),
+            0,
+            1000,
+            index_name,
+            [dataset_id],
+        )
+        rows = settings.docStoreConn.get_fields(res, fields)
+
+        # The RAPTOR graph row is identified by ``compile_kwd``
+        # alone — it intentionally doesn't carry ``knowledge_graph_kwd``
+        # (which belongs to the KG feature). Query it separately and
+        # union into the same bucket map below.
+        res_raptor = await thread_pool_exec(
+            settings.docStoreConn.search,
+            fields,
+            [],
+            {"doc_id": [document_id], "compile_kwd": ["raptor_graph"]},
+            [],
+            OrderByExpr(),
+            0,
+            16,
+            index_name,
+            [dataset_id],
+        )
+        raptor_rows = settings.docStoreConn.get_fields(res_raptor, fields)
+    except Exception as e:
+        return server_error_response(e)
+
+    # Merge the two field-maps so the grouping loop below treats them
+    # identically. Raptor rows clobber by id, which is fine — both
+    # sources produce stable per-row ids.
+    if raptor_rows:
+        rows = dict(rows or {})
+        rows.update(raptor_rows)
+
+    def _row_template_id(row: dict) -> str | None:
+        raw = row.get("compilation_template_ids")
+        if isinstance(raw, list):
+            for v in raw:
+                if isinstance(v, str) and v.strip():
+                    return v.strip()
+        if isinstance(raw, str) and raw.strip():
+            return raw.strip()
+        return None
+
+    # Group: template_id → {entities, relations, kind}
+    grouped: dict[str, dict] = {}
+    for row in (rows or {}).values():
+        graph = {}
+        try:
+            graph = json.loads(row.get("content_with_weight") or "{}")
+        except Exception:
+            continue
+        if not isinstance(graph, dict):
+            continue
+        entities = graph.get("entities") or []
+        relations = graph.get("relations") or []
+        if not entities and not relations:
+            continue
+
+        tid = _row_template_id(row)
+        compile_kwd_val = row.get("compile_kwd") or ""
+        kind_val = row.get("compilation_template_kind_kwd") or compile_kwd_val
+
+        # The RAPTOR graph row has no ``compilation_template_ids`` (it
+        # isn't derived from a user-authored template). Treat it as its
+        # own first-class bucket, not a legacy fallback.
+        is_raptor = compile_kwd_val == "raptor_graph"
+
+        if tid:
+            bucket_id = tid
+            row_kind_norm = _compilation_template_kind(kind_val)
+            meta = template_meta.get(bucket_id)
+            if not meta:
+                kind_matches = template_meta_by_kind.get(row_kind_norm) or []
+                if len(kind_matches) == 1:
+                    meta = kind_matches[0]
+            bucket_name = (meta or {}).get("template_name") or bucket_id
+            bucket_kind = (meta or {}).get("kind") or kind_val
+        elif is_raptor:
+            bucket_id = "raptor"
+            bucket_name = "RAPTOR Summary"
+            bucket_kind = "raptor"
+        else:
+            # Legacy row: synthesize a stable id keyed by compile_kwd so
+            # multiple legacy kinds (e.g. ``list`` + ``hypergraph``) on
+            # the same doc surface as separate tabs.
+            bucket_id = f"legacy:{compile_kwd_val}"
+            bucket_name = f"Legacy ({compile_kwd_val})"
+            bucket_kind = kind_val
+
+        if bucket_id not in grouped:
+            grouped[bucket_id] = {
+                "template_id": bucket_id,
+                "template_name": bucket_name,
+                "kind": bucket_kind,
+                "entities": [],
+                "relations": [],
+            }
+        grouped[bucket_id]["entities"].extend(entities)
+        grouped[bucket_id]["relations"].extend(relations)
+
+    # Order: configured templates first (in the user's chosen order),
+    # then any legacy buckets after.
+    ordered_ids: list[str] = []
+    for tid in configured_ids:
+        if tid in grouped and tid not in ordered_ids:
+            ordered_ids.append(tid)
+    for bucket_id in grouped.keys():
+        if bucket_id not in ordered_ids:
+            ordered_ids.append(bucket_id)
+
+    templates_out = [grouped[bid] for bid in ordered_ids if grouped[bid]["entities"] or grouped[bid]["relations"]]
+    return get_result(data={"templates": templates_out})
+
+
+@manager.route("/datasets/<dataset_id>/documents/<document_id>/structure/graph", methods=["DELETE"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def delete_document_structure_graph(tenant_id, dataset_id, document_id):
+    """Delete one structure-graph tab for a document.
+
+    Request body::
+
+        {"template_id": "<template id> | legacy:<compile_kwd> | raptor"}
+
+    Template-backed structure tabs remove both the compact graph row and
+    the underlying entity/relation rows. RAPTOR only removes the graph
+    projection row so summary chunks remain available for retrieval.
+    """
+    from rag.nlp import search
+
+    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+    dataset_tenant_id = _get_dataset_tenant_id(dataset_id)
+    if not dataset_tenant_id:
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+    docs = DocumentService.query(id=document_id, kb_id=dataset_id)
+    if not docs:
+        return get_error_data_result(message=f"You don't own the document {document_id}.")
+
+    req = await get_request_json()
+    template_id = str(req.get("template_id") or "").strip()
+    if not template_id:
+        return get_error_data_result(message="`template_id` is required")
+
+    index_name = search.index_name(dataset_tenant_id)
+
+    def _delete(condition: dict) -> int:
+        return settings.docStoreConn.delete(condition, index_name, dataset_id)
+
+    try:
+        deleted = 0
+        if template_id == "raptor":
+            deleted += _delete({"doc_id": [document_id], "compile_kwd": ["raptor_graph"]})
+            return get_result(data={"deleted": deleted}, message=f"deleted {deleted} structure graph rows")
+
+        if template_id.startswith("legacy:"):
+            compile_kwd = template_id[len("legacy:") :].strip()
+            if not compile_kwd:
+                return get_error_data_result(message="`template_id` is invalid")
+            base_condition = {"doc_id": [document_id], "compile_kwd": [compile_kwd]}
+        else:
+            base_condition = {"doc_id": [document_id], "compilation_template_ids": [template_id]}
+
+        deleted += _delete({**base_condition, "knowledge_graph_kwd": ["graph"]})
+        deleted += _delete({**base_condition, "knowledge_graph_kwd": ["entity", "relation"]})
+        return get_result(data={"deleted": deleted}, message=f"deleted {deleted} structure graph rows")
+    except Exception as e:
+        return server_error_response(e)
+
+
 @manager.route("/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["POST"])  # noqa: F821
 @login_required
 @add_tenant_id_to_kwargs
@@ -625,7 +951,11 @@ async def rm_chunk(tenant_id, dataset_id, document_id):
         if req.get("delete_all") is True:
             doc = docs[0]
             DocumentService.delete_chunk_images(doc, dataset_tenant_id)
-            chunk_number = settings.docStoreConn.delete({"doc_id": document_id}, search.index_name(dataset_tenant_id), dataset_id)
+            chunk_number = settings.docStoreConn.delete(
+                {"doc_id": document_id, "must_not": {"exists": "compile_kwd"}},
+                search.index_name(dataset_tenant_id),
+                dataset_id,
+            )
             if chunk_number != 0:
                 DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
             return get_result(message=f"deleted {chunk_number} chunks")
@@ -633,7 +963,7 @@ async def rm_chunk(tenant_id, dataset_id, document_id):
 
     unique_chunk_ids, duplicate_messages = check_duplicate_ids(chunk_ids, "chunk")
     chunk_number = settings.docStoreConn.delete(
-        {"doc_id": document_id, "id": unique_chunk_ids},
+        {"doc_id": document_id, "id": unique_chunk_ids, "must_not": {"exists": "compile_kwd"}},
         search.index_name(dataset_tenant_id),
         dataset_id,
     )
@@ -758,6 +1088,7 @@ async def switch_chunks(tenant_id, dataset_id, document_id):
     available_int = int(req["available_int"]) if "available_int" in req else (1 if req.get("available") else 0)
 
     try:
+
         def _switch_sync():
             e, doc = DocumentService.get_by_id(document_id)
             if not e:
diff --git a/api/apps/restful_apis/compilation_template_api.py b/api/apps/restful_apis/compilation_template_api.py
new file mode 100644
index 0000000000..ceb7c7e7de
--- /dev/null
+++ b/api/apps/restful_apis/compilation_template_api.py
@@ -0,0 +1,54 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from quart import Response
+
+from api.apps import current_user, login_required
+from api.apps.restful_apis.utils.compilation_template_validation import validate_template_payload
+from api.db.services.compilation_template_service import CompilationTemplateService
+from api.utils.api_utils import get_json_result, server_error_response
+
+
+_validate_template_payload = validate_template_payload
+
+
+@manager.route("/compilation_templates/builtins", methods=["GET"])  # noqa: F821
+@login_required
+def list_builtin_templates() -> Response:
+    """Built-in template palette — used as the per-child pre-fill in the
+    "Add template group" panel. Groups themselves are always user-created;
+    no builtin groups exist.
+    """
+    try:
+        templates = CompilationTemplateService.list_builtins()
+        if not templates:
+            CompilationTemplateService.seed_builtins_from_files()
+            templates = CompilationTemplateService.list_builtins()
+        if not templates:
+            templates = [
+                {
+                    "id": template["id"],
+                    "kind": template["kind"],
+                    "display_name": template["name"],
+                    "description": template.get("description", ""),
+                    "config": template["config"],
+                }
+                for template in CompilationTemplateService.load_builtins_from_files()
+            ]
+        templates = CompilationTemplateService.fill_default_llm_for_templates(templates, current_user.id)
+        return get_json_result(data=templates)
+    except Exception as exc:
+        return server_error_response(exc)
diff --git a/api/apps/restful_apis/compilation_template_group_api.py b/api/apps/restful_apis/compilation_template_group_api.py
new file mode 100644
index 0000000000..37f73b7247
--- /dev/null
+++ b/api/apps/restful_apis/compilation_template_group_api.py
@@ -0,0 +1,172 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from quart import Response, request
+
+from api.apps import current_user, login_required
+from api.apps.restful_apis.utils.compilation_template_validation import validate_template_payload
+from api.db.services.compilation_template_group_service import (
+    CompilationTemplateGroupService,
+    GroupValidationError,
+)
+from api.utils.api_utils import (
+    get_data_error_result,
+    get_json_result,
+    get_request_json,
+    server_error_response,
+    validate_request,
+)
+from api.utils.pagination_utils import validate_rest_api_page_size
+
+
+_GROUP_NAME_MAX = 128
+_GROUP_DESCRIPTION_MAX = 1024
+
+
+def _validate_group_payload(req: dict, require_all: bool = True) -> str:
+    if require_all:
+        for key in ("name", "templates"):
+            if key not in req:
+                return f"Missing required field: {key}."
+
+    name = req.get("name")
+    if name is not None:
+        if not isinstance(name, str) or not name.strip():
+            return "Invalid template group name."
+        if len(name.encode("utf-8")) > _GROUP_NAME_MAX:
+            return "Template group name is too long."
+
+    description = req.get("description")
+    if description is not None and (not isinstance(description, str) or len(description) > _GROUP_DESCRIPTION_MAX):
+        return "Invalid template group description."
+
+    templates = req.get("templates")
+    if templates is not None:
+        if not isinstance(templates, list) or not templates:
+            return "A template group must contain at least one template."
+        for child in templates:
+            if not isinstance(child, dict):
+                return "Invalid template entry in group."
+            err = validate_template_payload(child, require_all=True)
+            if err:
+                return err
+    return ""
+
+
+@manager.route("/compilation_template_groups", methods=["GET"])  # noqa: F821
+@login_required
+def list_groups() -> Response:
+    keywords = request.args.get("keywords", "")
+    scope = request.args.get("scope", "")
+    page_number = int(request.args.get("page", 0))
+    items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 0)))
+    orderby = request.args.get("orderby", "create_time")
+    desc = request.args.get("desc", "true").lower() != "false"
+
+    try:
+        groups = CompilationTemplateGroupService.list_saved(current_user.id, keywords, scope, orderby, desc)
+        total = len(groups)
+        if page_number and items_per_page:
+            groups = groups[(page_number - 1) * items_per_page : page_number * items_per_page]
+        return get_json_result(data={"groups": groups, "total": total})
+    except Exception as exc:
+        return server_error_response(exc)
+
+
+@manager.route("/compilation_template_groups/<group_id>", methods=["GET"])  # noqa: F821
+@login_required
+def detail(group_id: str) -> Response:
+    try:
+        group = CompilationTemplateGroupService.get_saved(group_id, current_user.id)
+        if group is None:
+            return get_data_error_result(message=f"Cannot find compilation template group {group_id}.")
+        return get_json_result(data=group)
+    except Exception as exc:
+        return server_error_response(exc)
+
+
+@manager.route("/compilation_template_groups", methods=["POST"])  # noqa: F821
+@login_required
+@validate_request("name", "templates")
+async def create() -> Response:
+    req = await get_request_json()
+    error = _validate_group_payload(req)
+    if error:
+        return get_data_error_result(message=error)
+
+    name = req["name"].strip()
+    if CompilationTemplateGroupService.name_exists(current_user.id, name):
+        return get_data_error_result(message="Duplicated compilation template group name.")
+
+    try:
+        saved = CompilationTemplateGroupService.create_group(
+            tenant_id=current_user.id,
+            name=name,
+            description=req.get("description", ""),
+            templates=req["templates"],
+        )
+        return get_json_result(data=saved)
+    except GroupValidationError as exc:
+        return get_data_error_result(message=str(exc))
+    except Exception as exc:
+        return server_error_response(exc)
+
+
+@manager.route("/compilation_template_groups/<group_id>", methods=["PUT"])  # noqa: F821
+@login_required
+async def update(group_id: str) -> Response:
+    req = await get_request_json()
+    error = _validate_group_payload(req, require_all=False)
+    if error:
+        return get_data_error_result(message=error)
+
+    existing = CompilationTemplateGroupService.get_saved(group_id, current_user.id)
+    if existing is None:
+        return get_data_error_result(message=f"Cannot find compilation template group {group_id}.")
+
+    name = req.get("name")
+    if isinstance(name, str):
+        name = name.strip()
+        if CompilationTemplateGroupService.name_exists(current_user.id, name, group_id):
+            return get_data_error_result(message="Duplicated compilation template group name.")
+
+    try:
+        updated = CompilationTemplateGroupService.update_group(
+            group_id=group_id,
+            tenant_id=current_user.id,
+            name=name if isinstance(name, str) else None,
+            description=req.get("description") if "description" in req else None,
+            templates=req.get("templates") if "templates" in req else None,
+        )
+        if updated is None:
+            return get_data_error_result(message=f"Cannot find compilation template group {group_id}.")
+        return get_json_result(data=updated)
+    except GroupValidationError as exc:
+        return get_data_error_result(message=str(exc))
+    except Exception as exc:
+        return server_error_response(exc)
+
+
+@manager.route("/compilation_template_groups/<group_id>", methods=["DELETE"])  # noqa: F821
+@login_required
+def delete(group_id: str) -> Response:
+    try:
+        ok = CompilationTemplateGroupService.delete_group(group_id, current_user.id)
+        if not ok:
+            return get_data_error_result(message=f"Cannot find compilation template group {group_id}.")
+        return get_json_result(data=True)
+    except Exception as exc:
+        return server_error_response(exc)
diff --git a/api/apps/restful_apis/dataset_api.py b/api/apps/restful_apis/dataset_api.py
index 480b949abf..0c4df25901 100644
--- a/api/apps/restful_apis/dataset_api.py
+++ b/api/apps/restful_apis/dataset_api.py
@@ -520,7 +520,7 @@ async def search(tenant_id, dataset_id):
     req, err = await validate_and_parse_json_request(request, SearchDatasetReq)
     if err is not None:
         return get_error_argument_result(err)
-    req['dataset_ids'] = [dataset_id]
+    req["dataset_ids"] = [dataset_id]
     try:
         success, result = await dataset_api_service.search_datasets(tenant_id, req)
         if success:
@@ -556,6 +556,263 @@ async def get_knowledge_graph(tenant_id, dataset_id):
         return get_error_data_result(message="Internal server error")
 
 
+@manager.route("/datasets/<dataset_id>/any_artifact", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def has_any_wiki(tenant_id, dataset_id):
+    """Probe whether this dataset has any compiled artifact pages.
+
+    GET /api/v1/datasets/<dataset_id>/any_artifact
+    Success: {"code": 0, "data": {"has": bool}}
+    The frontend uses this to decide whether to surface the Artifact tab
+    in the dataset sidebar.
+    """
+    try:
+        success, result = await dataset_api_service.has_any_wiki(dataset_id, tenant_id)
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/artifacts", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def list_wiki_pages(tenant_id, dataset_id):
+    """List artifact pages for the dataset Artifact tab.
+
+    GET /api/v1/datasets/<dataset_id>/artifacts?page=1&page_size=200&page_type=entity
+    Success: {"code": 0, "data": {"total": int, "items": [{slug, title, page_type}]}}
+    """
+    try:
+        page = int(request.args.get("page", 1) or 1)
+        page_size = int(request.args.get("page_size", 200) or 200)
+    except (TypeError, ValueError):
+        return get_error_argument_result("page and page_size must be integers")
+    page_type = request.args.get("page_type") or None
+
+    try:
+        success, result = await dataset_api_service.list_wiki_pages(
+            dataset_id,
+            tenant_id,
+            page=page,
+            page_size=page_size,
+            page_type=page_type,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/artifacts/graph", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def get_wiki_graph(tenant_id, dataset_id):
+    """Return an incremental slice of the canvas graph for this dataset.
+
+    GET /api/v1/datasets/<dataset_id>/artifacts/graph[?node=<slug>]
+    - ``node`` omitted: overview centred on the heaviest-weighted
+      entities, expanded outward until ``MAX_LOADING_ENTITY`` is hit.
+    - ``node`` provided: subgraph centred on that entity, including all
+      outgoing relations and their ``to`` targets (also capped).
+
+    Success: ``{"code": 0, "data": {"entities":[…],"relations":[…]}}``.
+    """
+    try:
+        node = request.args.get("node", None)
+        if isinstance(node, str):
+            node = node.strip() or None
+        success, result = await dataset_api_service.get_wiki_graph(
+            dataset_id,
+            tenant_id,
+            node=node,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/artifacts", methods=["DELETE"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def clear_wiki(tenant_id, dataset_id):
+    """Wipe every artifact-related row from ES for this KB.
+
+    DELETE /api/v1/datasets/<dataset_id>/artifacts
+    Removes the five ``compile_kwd`` row types written by the artifact
+    pipeline (MAP extracts / REDUCE results / PLAN / page drafts / pages).
+    Success: {"code": 0, "data": {"deleted": {kwd: result}}}
+    """
+    try:
+        success, result = await dataset_api_service.clear_wiki(
+            dataset_id,
+            tenant_id,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/artifacts/<page_type>/<path:slug>", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def get_wiki_page(tenant_id, dataset_id, page_type, slug):
+    """Fetch one artifact page by (page_type, slug).
+
+    GET /api/v1/datasets/<dataset_id>/artifacts/<page_type>/<slug>
+    ``slug`` is the tail after the page type — the same form that markdown
+    links in ``content_md_rendered`` carry as
+    ``artifact/<kb_id>/<page_type>/<slug>``.
+    Success: {"code": 0, "data": page_dict | null}
+    """
+    try:
+        success, result = await dataset_api_service.get_wiki_page(
+            dataset_id,
+            tenant_id,
+            page_type,
+            slug,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/any_skill", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def has_any_skill(tenant_id, dataset_id):
+    """Probe whether this dataset has a compiled Corpus2Skill tree.
+
+    GET /api/v1/datasets/<dataset_id>/any_skill
+    Success: {"code": 0, "data": {"has": bool}}
+    """
+    try:
+        success, result = await dataset_api_service.has_any_skill(dataset_id, tenant_id)
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/skills", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def get_skill_tree(tenant_id, dataset_id):
+    """Fetch the aggregate recursive Corpus2Skill tree for this dataset.
+
+    GET /api/v1/datasets/<dataset_id>/skills
+    Success: {"code": 0, "data": skill_all_row | null}
+    """
+    try:
+        success, result = await dataset_api_service.get_skill_tree(
+            dataset_id,
+            tenant_id,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+@manager.route("/datasets/<dataset_id>/skills/<path:skill_kwd>", methods=["GET"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def get_skill_page(tenant_id, dataset_id, skill_kwd):
+    """Fetch full markdown for one Corpus2Skill node by skill_kwd.
+
+    GET /api/v1/datasets/<dataset_id>/skills/<skill_kwd>
+    Success: {"code": 0, "data": skill_row | null}
+    """
+    try:
+        success, result = await dataset_api_service.get_skill_page(
+            dataset_id,
+            tenant_id,
+            skill_kwd,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
+# The two artifact-commit endpoints
+#   GET /datasets/<dataset_id>/artifacts/<page_type>/<path:slug>/commits
+#   GET /datasets/<dataset_id>/artifacts/commits/<commit_id>
+# were retired here — their functionality is now served by the generic
+# file-commit routes:
+#   GET /datasets/<dataset_id>/commits?slug=<page_type>/<name>
+#   GET /datasets/<dataset_id>/commits/<commit_id>
+# See ``api/apps/restful_apis/file_commit_api.py`` and
+# ``api/db/services/file_commit_service.py`` (record_page_edit /
+# list_page_commits / get_page_commit_detail).
+
+
+@manager.route("/datasets/<dataset_id>/artifacts/<page_type>/<path:slug>", methods=["PUT"])  # noqa: F821
+@login_required
+@add_tenant_id_to_kwargs
+async def update_wiki_page(tenant_id, dataset_id, page_type, slug):
+    """Edit one artifact page in place.
+
+    PUT /api/v1/datasets/<dataset_id>/artifacts/<page_type>/<slug>
+    Body: {"content_md": "<markdown>"}
+
+    Only the page row is updated — canvas / entity / relation rows stay
+    stale until the next artifact compile. Success returns the
+    re-fetched page dict so the dialog can refresh its preview cleanly.
+    """
+    try:
+        req = await request.get_json()
+        if not isinstance(req, dict):
+            return get_error_argument_result("Body must be a JSON object.")
+        content_md = req.get("content_md")
+        if not isinstance(content_md, str):
+            return get_error_argument_result("'content_md' must be a string.")
+        # Commit metadata — both optional. Title defaults server-side to
+        # "Edit <slug>" inside record_edit when missing.
+        title = req.get("title")
+        comments = req.get("comments")
+        if title is not None and not isinstance(title, str):
+            return get_error_argument_result("'title' must be a string.")
+        if comments is not None and not isinstance(comments, str):
+            return get_error_argument_result("'comments' must be a string.")
+        success, result = await dataset_api_service.update_wiki_page(
+            dataset_id,
+            tenant_id,
+            page_type,
+            slug,
+            content_md,
+            user_id=getattr(current_user, "id", None),
+            title=title,
+            comments=comments,
+        )
+        if success:
+            return get_result(data=result)
+        return get_result(data=False, message=result, code=RetCode.AUTHENTICATION_ERROR)
+    except Exception as e:
+        logging.exception(e)
+        return get_error_data_result(message="Internal server error")
+
+
 @manager.route("/datasets/<dataset_id>/index", methods=["POST"])  # noqa: F821
 @login_required
 @add_tenant_id_to_kwargs
diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py
index b9eecc188f..541ce2850a 100644
--- a/api/apps/restful_apis/document_api.py
+++ b/api/apps/restful_apis/document_api.py
@@ -77,6 +77,58 @@ from common.ssrf_guard import assert_url_is_safe
 from rag.nlp import search
 
 
+def _parser_config_compilation_template_group_ids(parser_config) -> list[str]:
+    """Read template-group ids from a doc's parser_config.
+
+    The doc now references compilation template groups via a list. A
+    legacy single string id is still accepted. Old
+    ``compilation_template_ids`` data is
+    intentionally ignored per the migration spec.
+    """
+
+    def _normalize(raw) -> list[str]:
+        if isinstance(raw, str):
+            raw = [raw]
+        if not isinstance(raw, list):
+            return []
+        ids: list[str] = []
+        seen: set[str] = set()
+        for gid in raw:
+            if not isinstance(gid, str):
+                continue
+            gid = gid.strip()
+            if gid and gid not in seen:
+                seen.add(gid)
+                ids.append(gid)
+        return ids
+
+    if not isinstance(parser_config, dict):
+        return []
+    if "compilation_template_group_id" in parser_config:
+        return _normalize(parser_config.get("compilation_template_group_id"))
+    ext = parser_config.get("ext")
+    if isinstance(ext, dict):
+        return _normalize(ext.get("compilation_template_group_id"))
+    return []
+
+
+def _compilation_template_group_id_changed(old_config, new_config) -> bool:
+    return _parser_config_compilation_template_group_ids(old_config) != _parser_config_compilation_template_group_ids(new_config)
+
+
+def _normalize_parser_config_compilation_template_group_ids(parser_config) -> bool:
+    if not isinstance(parser_config, dict):
+        return False
+    if "compilation_template_group_id" not in parser_config and not (isinstance(parser_config.get("ext"), dict) and "compilation_template_group_id" in parser_config["ext"]):
+        return False
+    group_ids = _parser_config_compilation_template_group_ids(parser_config)
+    parser_config["compilation_template_group_id"] = group_ids
+    ext = parser_config.get("ext")
+    if isinstance(ext, dict) and "compilation_template_group_id" in ext:
+        ext["compilation_template_group_id"] = group_ids
+    return True
+
+
 @manager.route("/documents/upload", methods=["POST"])  # noqa: F821
 @login_required
 @add_tenant_id_to_kwargs
@@ -233,9 +285,16 @@ async def update_document(tenant_id, dataset_id, document_id):
     if "parser_id" in req and ((doc.type == FileType.VISUAL and req["parser_id"] != "picture") or (re.search(r"\.(ppt|pptx|pages)$", doc.name) and req["parser_id"] != "presentation")):
         return get_data_error_result(message="Not supported yet!")
 
-    # parser config provided (already validated in UpdateDocumentReq), update it
+    parser_config_template_group_changed = False
+    # parser config provided (already validated in UpdateDocumentReq), update it.
+    # Changing the document-scoped knowledge compilation template group
+    # affects parse output, so the document must be parsed again for it to
+    # execute.
     if update_doc_req.parser_config:
+        old_parser_config = dict(doc.parser_config or {})
         req["parser_config"].update(update_doc_req.parser_config.ext)
+        parser_config_template_group_touched = _normalize_parser_config_compilation_template_group_ids(req["parser_config"])
+        parser_config_template_group_changed = parser_config_template_group_touched and _compilation_template_group_id_changed(old_parser_config, req["parser_config"])
         DocumentService.update_parser_config(doc.id, req["parser_config"])
 
     # pipeline_id provided - reset document for reparse
@@ -246,6 +305,12 @@ async def update_document(tenant_id, dataset_id, document_id):
     elif update_doc_req.chunk_method:
         if error := update_chunk_method(req, doc, tenant_id):
             return error
+        if parser_config_template_group_changed and doc.parser_id.lower() == req["chunk_method"].lower():
+            if error := reset_document_for_reparse(doc, tenant_id):
+                return error
+    elif parser_config_template_group_changed:
+        if error := reset_document_for_reparse(doc, tenant_id):
+            return error
 
     if "enabled" in req:  # already checked in UpdateDocumentReq - it's int if present
         # "enabled" flag provided, the update method will check if it's changed and then update if so
diff --git a/api/apps/restful_apis/file_commit_api.py b/api/apps/restful_apis/file_commit_api.py
index 99ab3edc4d..1882c8df5c 100644
--- a/api/apps/restful_apis/file_commit_api.py
+++ b/api/apps/restful_apis/file_commit_api.py
@@ -16,19 +16,11 @@
 
 import logging
 from functools import wraps
-
 from quart import request
-
 from api.apps import login_required, current_user
 from api.utils.api_utils import get_json_result, get_data_error_result, get_request_json, server_error_response, validate_request
-
-# manager is injected dynamically by api.apps.register_page() before this
-# module is exec'd. DO NOT assign manager = None here — it would overwrite
-# the Blueprint that register_page set on the module.
 from api.db.services.file_commit_service import FileCommitService
 from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.file_service import FileService
-from common.constants import FileSource
 
 logger = logging.getLogger(__name__)
 
@@ -45,12 +37,16 @@ def _register_resolver(entity_type):
     The decorated function receives (entity_id) and must return a folder_id
     or None if the entity has no corresponding folder.
     """
+
     def decorator(func):
         _ENTITY_RESOLVERS[entity_type] = func
+
         @wraps(func)
         def wrapper(entity_id):
             return func(entity_id)
+
         return wrapper
+
     return decorator
 
 
@@ -64,23 +60,29 @@ def _resolve_folder_id(entity_type, entity_id):
 
 @_register_resolver("datasets")
 def _resolve_dataset_folder(dataset_id):
-    success, kb = KnowledgebaseService.get_by_id(dataset_id)
+    """For the ``/datasets/<dataset_id>/commits`` scope we now serve
+    artifact-page history rather than workspace file commits.
+
+    Artifact commits are written via
+    :meth:`FileCommitService.record_page_edit` with ``folder_id = kb_id``,
+    so this resolver simply returns the dataset id verbatim. Workspace
+    file-commit browsing for the same KB still works via ``/workspace/*``
+    or ``/folders/*`` with the real folder id — the two commit domains
+    coexist in ``file_commit`` but never mix under the same folder_id.
+
+    A quick existence check keeps us honest: returning ``None`` for a
+    missing KB drives ``_resolve`` to reject the request before it hits
+    a query.
+    """
+    success, _kb = KnowledgebaseService.get_by_id(dataset_id)
     if not success:
         return None
-    # Find the folder with matching name, source_type, and tenant_id
-    folders = FileService.query(
-        name=kb.name,
-        source_type=FileSource.KNOWLEDGEBASE.value,
-        type="folder",
-        tenant_id=kb.tenant_id,
-    )
-    if folders:
-        return folders[0].id
-    return None
+    return dataset_id
 
 
 # ── Route registration helper ─────────────────────────────────────────────
 
+
 def _register_commit_routes(prefix, param_name, resolver_type=None):
     """Register all 8 commit endpoints for a given URL prefix.
 
@@ -102,7 +104,7 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
         return folder_id
 
     # ── Create commit ──────────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits', methods=['POST'], endpoint=f'create_commit_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/commits", methods=["POST"], endpoint=f"create_commit_{_n}")  # noqa: F821
     @login_required
     @validate_request("message", "files")
     async def create_commit(entity_id):
@@ -115,21 +117,27 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
                 message=req["message"],
                 file_changes=req["files"],
             )
-            return get_json_result(data={
-                "id": commit.id,
-                "folder_id": commit.folder_id,
-                "parent_id": commit.parent_id,
-                "message": commit.message,
-                "author_id": commit.author_id,
-                "file_count": commit.file_count,
-                "tree_state": commit.tree_state,
-                "create_time": commit.create_time,
-            })
+            return get_json_result(
+                data={
+                    "id": commit.id,
+                    "folder_id": commit.folder_id,
+                    "parent_id": commit.parent_id,
+                    "message": commit.message,
+                    "author_id": commit.author_id,
+                    "file_count": commit.file_count,
+                    "tree_state": commit.tree_state,
+                    "create_time": commit.create_time,
+                }
+            )
         except Exception as e:
             return server_error_response(e)
 
     # ── List commits ───────────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits', methods=['GET'], endpoint=f'list_commits_{_n}')  # noqa: F821
+    # Accepts an optional ``?slug=<page_type>/<name>`` filter to serve
+    # per-artifact-page history. When ``slug`` is set we delegate to
+    # ``list_page_commits`` (indexed join on FileCommitItem.slug_kwd);
+    # otherwise this is the plain folder-wide commit list.
+    @manager.route(f"{prefix}/commits", methods=["GET"], endpoint=f"list_commits_{_n}")  # noqa: F821
     @login_required
     async def list_commits(entity_id):
         folder_id = _resolve(entity_id)
@@ -138,26 +146,57 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
             page_size = int(request.args.get("page_size", 15))
             order_by = request.args.get("order_by", "create_time")
             desc = request.args.get("desc", "true").lower() != "false"
+            slug = request.args.get("slug") or ""
+
+            if slug:
+                total, items = FileCommitService.list_page_commits(
+                    tenant_id="",  # scoped implicitly via folder_id == kb_id
+                    kb_id=folder_id,
+                    slug=slug,
+                    page=page,
+                    page_size=page_size,
+                )
+                return get_json_result(
+                    data={
+                        "total": total,
+                        "page": page,
+                        "page_size": page_size,
+                        "commits": items,
+                    }
+                )
+
             commits, total = FileCommitService.list_commits(folder_id, page, page_size, order_by, desc)
-            return get_json_result(data={
-                "total": total,
-                "page": page,
-                "page_size": page_size,
-                "commits": [{
-                    "id": c.id,
-                    "folder_id": c.folder_id,
-                    "parent_id": c.parent_id,
-                    "message": c.message,
-                    "author_id": c.author_id,
-                    "file_count": c.file_count,
-                    "create_time": c.create_time,
-                } for c in commits],
-            })
+            return get_json_result(
+                data={
+                    "total": total,
+                    "page": page,
+                    "page_size": page_size,
+                    "commits": [
+                        {
+                            "id": c.id,
+                            "folder_id": c.folder_id,
+                            "parent_id": c.parent_id,
+                            "message": c.message,
+                            "author_id": c.author_id,
+                            "file_count": c.file_count,
+                            "create_time": c.create_time,
+                            # Artifact-commit extension — null for workspace commits.
+                            "title": getattr(c, "title", None),
+                            "comments": getattr(c, "comments", None),
+                        }
+                        for c in commits
+                    ],
+                }
+            )
         except Exception as e:
             return server_error_response(e)
 
     # ── Get commit ─────────────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits/<commit_id>', methods=['GET'], endpoint=f'get_commit_{_n}')  # noqa: F821
+    # For artifact commits (folder_id == kb_id) we route through
+    # ``get_page_commit_detail`` which resolves ``content_after`` from
+    # the configured blob store and returns the flat shape the old
+    # ``/artifacts/commits/<id>`` endpoint used to serve.
+    @manager.route(f"{prefix}/commits/<commit_id>", methods=["GET"], endpoint=f"get_commit_{_n}")  # noqa: F821
     @login_required
     async def get_commit(entity_id, commit_id):
         folder_id = _resolve(entity_id)
@@ -167,29 +206,47 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
                 return get_data_error_result("Commit not found")
             if commit.folder_id != folder_id:
                 return get_data_error_result("Commit not found in workspace")
+
+            # Artifact commits carry a non-null ``title``; use that as
+            # the discriminator to pick the enriched response shape.
+            if getattr(commit, "title", None):
+                detail = FileCommitService.get_page_commit_detail(
+                    tenant_id="",
+                    kb_id=folder_id,
+                    commit_id=commit_id,
+                )
+                if detail is None:
+                    return get_data_error_result("Commit not found")
+                return get_json_result(data=detail)
+
             items = FileCommitService.list_commit_files(commit_id)
-            return get_json_result(data={
-                "id": commit.id,
-                "folder_id": commit.folder_id,
-                "parent_id": commit.parent_id,
-                "message": commit.message,
-                "author_id": commit.author_id,
-                "file_count": commit.file_count,
-                "create_time": commit.create_time,
-                "files": [{
-                    "file_id": item.file_id,
-                    "operation": item.operation,
-                    "old_hash": item.old_hash,
-                    "new_hash": item.new_hash,
-                    "old_name": item.old_name,
-                    "new_name": item.new_name,
-                } for item in items],
-            })
+            return get_json_result(
+                data={
+                    "id": commit.id,
+                    "folder_id": commit.folder_id,
+                    "parent_id": commit.parent_id,
+                    "message": commit.message,
+                    "author_id": commit.author_id,
+                    "file_count": commit.file_count,
+                    "create_time": commit.create_time,
+                    "files": [
+                        {
+                            "file_id": item.file_id,
+                            "operation": item.operation,
+                            "old_hash": item.old_hash,
+                            "new_hash": item.new_hash,
+                            "old_name": item.old_name,
+                            "new_name": item.new_name,
+                        }
+                        for item in items
+                    ],
+                }
+            )
         except Exception as e:
             return server_error_response(e)
 
     # ── List commit files ──────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits/<commit_id>/files', methods=['GET'], endpoint=f'list_commit_files_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/commits/<commit_id>/files", methods=["GET"], endpoint=f"list_commit_files_{_n}")  # noqa: F821
     @login_required
     async def list_commit_files(entity_id, commit_id):
         folder_id = _resolve(entity_id)
@@ -200,22 +257,27 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
             if commit.folder_id != folder_id:
                 return get_data_error_result("Commit not found in workspace")
             items = FileCommitService.list_commit_files(commit_id)
-            return get_json_result(data=[{
-                "id": item.id,
-                "file_id": item.file_id,
-                "operation": item.operation,
-                "old_hash": item.old_hash,
-                "new_hash": item.new_hash,
-                "old_location": item.old_location,
-                "new_location": item.new_location,
-                "old_name": item.old_name,
-                "new_name": item.new_name,
-            } for item in items])
+            return get_json_result(
+                data=[
+                    {
+                        "id": item.id,
+                        "file_id": item.file_id,
+                        "operation": item.operation,
+                        "old_hash": item.old_hash,
+                        "new_hash": item.new_hash,
+                        "old_location": item.old_location,
+                        "new_location": item.new_location,
+                        "old_name": item.old_name,
+                        "new_name": item.new_name,
+                    }
+                    for item in items
+                ]
+            )
         except Exception as e:
             return server_error_response(e)
 
     # ── Diff commits ───────────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits/diff', methods=['GET'], endpoint=f'diff_commits_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/commits/diff", methods=["GET"], endpoint=f"diff_commits_{_n}")  # noqa: F821
     @login_required
     async def diff_commits(entity_id):
         folder_id = _resolve(entity_id)
@@ -236,7 +298,7 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
             return server_error_response(e)
 
     # ── Get uncommitted changes ────────────────────────────────────────────
-    @manager.route(f'{prefix}/changes', methods=['GET'], endpoint=f'get_uncommitted_changes_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/changes", methods=["GET"], endpoint=f"get_uncommitted_changes_{_n}")  # noqa: F821
     @login_required
     async def get_uncommitted_changes(entity_id):
         folder_id = _resolve(entity_id)
@@ -247,7 +309,7 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
             return server_error_response(e)
 
     # ── Get commit tree ────────────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits/<commit_id>/tree', methods=['GET'], endpoint=f'get_commit_tree_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/commits/<commit_id>/tree", methods=["GET"], endpoint=f"get_commit_tree_{_n}")  # noqa: F821
     @login_required
     async def get_commit_tree(entity_id, commit_id):
         folder_id = _resolve(entity_id)
@@ -263,7 +325,7 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
             return server_error_response(e)
 
     # ── Get commit file content ────────────────────────────────────────────
-    @manager.route(f'{prefix}/commits/<commit_id>/files/<file_id>/content', methods=['GET'], endpoint=f'get_commit_file_content_{_n}')  # noqa: F821
+    @manager.route(f"{prefix}/commits/<commit_id>/files/<file_id>/content", methods=["GET"], endpoint=f"get_commit_file_content_{_n}")  # noqa: F821
     @login_required
     async def get_commit_file_content(entity_id, commit_id, file_id):
         folder_id = _resolve(entity_id)
@@ -282,14 +344,15 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
 
     # Expose handlers at module level for direct testing.
     _g = globals()
-    _g['create_commit'] = create_commit
-    _g['list_commits'] = list_commits
-    _g['get_commit'] = get_commit
-    _g['list_commit_files'] = list_commit_files
-    _g['diff_commits'] = diff_commits
-    _g['get_uncommitted_changes'] = get_uncommitted_changes
-    _g['get_commit_tree'] = get_commit_tree
-    _g['get_commit_file_content'] = get_commit_file_content
+    _g["create_commit"] = create_commit
+    _g["list_commits"] = list_commits
+    _g["get_commit"] = get_commit
+    _g["list_commit_files"] = list_commit_files
+    _g["diff_commits"] = diff_commits
+    _g["get_uncommitted_changes"] = get_uncommitted_changes
+    _g["get_commit_tree"] = get_commit_tree
+    _g["get_commit_file_content"] = get_commit_file_content
+
 
 # ── Register routes for all entity types ──────────────────────────────────
 # All URL patterns use <entity_id> as the consistent param name.
@@ -297,14 +360,14 @@ def _register_commit_routes(prefix, param_name, resolver_type=None):
 # For other entity types entity_id is resolved via _resolve_folder_id().
 # Register datasets first, workspace second, folders last —
 # the last call's handlers overwrite module-level names for test access.
-_register_commit_routes('/datasets/<entity_id>', 'entity_id', resolver_type='datasets')
-_register_commit_routes('/workspace/<entity_id>', 'entity_id')  # alias — workspace_id == folder_id
-_register_commit_routes('/folders/<entity_id>', 'entity_id')  # direct — entity_id == folder_id (wins)
+_register_commit_routes("/datasets/<entity_id>", "entity_id", resolver_type="datasets")
+_register_commit_routes("/workspace/<entity_id>", "entity_id")  # alias — workspace_id == folder_id
+_register_commit_routes("/folders/<entity_id>", "entity_id")  # direct — entity_id == folder_id (wins)
 # /memories and /skills routes are not mounted until resolvers are implemented.
 
 
 # ── File version history (shared across all entity types) ─────────────────
-@manager.route('/files/<file_id>/versions', methods=['GET'])  # noqa: F821
+@manager.route("/files/<file_id>/versions", methods=["GET"])  # noqa: F821
 @login_required
 async def get_file_version_history(file_id):
     try:
diff --git a/api/apps/restful_apis/utils/compilation_template_validation.py b/api/apps/restful_apis/utils/compilation_template_validation.py
new file mode 100644
index 0000000000..0ee378fcc8
--- /dev/null
+++ b/api/apps/restful_apis/utils/compilation_template_validation.py
@@ -0,0 +1,79 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+def validate_template_payload(req: dict, require_all: bool = True) -> str:
+    """Validate a single template payload (kind + config + name)."""
+    required = ["name", "kind", "config"] if require_all else []
+    for key in required:
+        if key not in req:
+            return f"Missing required field: {key}."
+
+    name = req.get("name")
+    if name is not None and (not isinstance(name, str) or not name.strip() or len(name.encode("utf-8")) > 128):
+        return "Invalid template name."
+
+    description = req.get("description")
+    if description is not None and (not isinstance(description, str) or len(description) > 1024):
+        return "Invalid template description."
+
+    kind = req.get("kind")
+    if kind is not None and (not isinstance(kind, str) or not kind):
+        return "Invalid template kind."
+
+    config = req.get("config")
+    if config is not None and not isinstance(config, dict):
+        return "Invalid template config."
+    if isinstance(config, dict):
+        if len(str(config.get("global_rules") or "")) > 4096:
+            return "Global compilation rules is too long."
+        for section in ["entity", "relation"]:
+            fields = (config.get(section) or {}).get("fields") or []
+            seen_types = set()
+            for field in fields:
+                field_type = str((field or {}).get("type") or "").strip()
+                if not field_type:
+                    return f"{section.capitalize()} type is required."
+                if field_type in seen_types:
+                    return f"{section.capitalize()} type can not be duplicated."
+                seen_types.add(field_type)
+                if not str((field or {}).get("description") or "").strip():
+                    return f"{section.capitalize()} field description is required."
+                if len(str((field or {}).get("description") or "")) > 1024:
+                    return f"{section.capitalize()} field description is too long."
+                if len(str((field or {}).get("rule") or "")) > 1024:
+                    return f"{section.capitalize()} field rule is too long."
+        if config.get("kind") == "artifacts" or req.get("kind") == "artifacts":
+            for field in (config.get("claim") or {}).get("fields") or []:
+                if not str((field or {}).get("statement") or "").strip():
+                    return "Claim statement is required."
+                if not str((field or {}).get("subject") or "").strip():
+                    return "Claim subject is required."
+                if len(str((field or {}).get("statement") or "")) > 1024:
+                    return "Claim statement is too long."
+                if len(str((field or {}).get("subject") or "")) > 1024:
+                    return "Claim subject is too long."
+            for field in (config.get("concept") or {}).get("fields") or []:
+                if not str((field or {}).get("term") or "").strip():
+                    return "Concept term is required."
+                if not str((field or {}).get("definition_excerpt") or "").strip():
+                    return "Concept definition excerpt is required."
+                if len(str((field or {}).get("term") or "")) > 1024:
+                    return "Concept term is too long."
+                if len(str((field or {}).get("definition_excerpt") or "")) > 1024:
+                    return "Concept definition excerpt is too long."
+
+    return ""
diff --git a/api/apps/services/dataset_api_service.py b/api/apps/services/dataset_api_service.py
index 78c7d89daf..62ae708562 100644
--- a/api/apps/services/dataset_api_service.py
+++ b/api/apps/services/dataset_api_service.py
@@ -31,25 +31,32 @@ from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID, TaskService
 from api.db.services.user_service import TenantService, UserService, UserTenantService
 from common.constants import FileSource, StatusEnum
 from api.utils.api_utils import deep_merge, get_parser_config, remap_dictionary_keys, verify_embedding_availability
+from common.misc_utils import thread_pool_exec
 
-_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}
+_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap", "artifact", "skill"}
 
 _INDEX_TYPE_TO_TASK_TYPE = {
     "graph": "graphrag",
     "raptor": "raptor",
     "mindmap": "mindmap",
+    "artifact": "artifact",
+    "skill": "skill",
 }
 
 _INDEX_TYPE_TO_TASK_ID_FIELD = {
     "graph": "graphrag_task_id",
     "raptor": "raptor_task_id",
     "mindmap": "mindmap_task_id",
+    "artifact": "artifact_task_id",
+    "skill": "skill_task_id",
 }
 
 _INDEX_TYPE_TO_DISPLAY_NAME = {
     "graph": "Graph",
     "raptor": "RAPTOR",
     "mindmap": "Mindmap",
+    "artifact": "Artifact",
+    "skill": "Skill",
 }
 
 
@@ -155,8 +162,7 @@ async def delete_datasets(tenant_id: str, ids: list = None, delete_all: bool = F
                 # A missing row usually means stale/partial data (e.g. link removed earlier,
                 # failed post-insert file linkage, or legacy rows). Deletion still proceeds.
                 logging.warning(
-                    "delete_datasets: document %s in dataset %s has no File2Document row; "
-                    "skipping linked file delete",
+                    "delete_datasets: document %s in dataset %s has no File2Document row; skipping linked file delete",
                     doc.id,
                     kb_id,
                 )
@@ -459,8 +465,8 @@ def delete_knowledge_graph(dataset_id: str, tenant_id: str):
     _, kb = KnowledgebaseService.get_by_id(dataset_id)
     from rag.nlp import search
     from rag.graphrag.phase_markers import clear_phase_markers
-    settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation", "community_report"]},
-                                 search.index_name(kb.tenant_id), dataset_id)
+
+    settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation", "community_report"]}, search.index_name(kb.tenant_id), dataset_id)
     # Wiping the graph invalidates any phase-completion markers used to
     # short-circuit resolution / community detection on resume.
     clear_phase_markers(dataset_id)
@@ -842,8 +848,8 @@ def delete_index(dataset_id: str, tenant_id: str, index_type: str, wipe: bool =
     if wipe and index_type == "graph":
         from rag.nlp import search
         from rag.graphrag.phase_markers import clear_phase_markers
-        settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation", "community_report"]},
-                                     search.index_name(kb.tenant_id), dataset_id)
+
+        settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation", "community_report"]}, search.index_name(kb.tenant_id), dataset_id)
         # Wiping the graph invalidates any phase-completion markers used to
         # short-circuit resolution / community detection on resume.
         clear_phase_markers(dataset_id)
@@ -852,6 +858,10 @@ def delete_index(dataset_id: str, tenant_id: str, index_type: str, wipe: bool =
         from rag.nlp import search
 
         settings.docStoreConn.delete({"raptor_kwd": ["raptor"]}, search.index_name(kb.tenant_id), dataset_id)
+    elif wipe and index_type == "skill":
+        from rag.nlp import search
+
+        settings.docStoreConn.delete({"compile_kwd": ["skill", "skill_all"]}, search.index_name(kb.tenant_id), dataset_id)
 
     KnowledgebaseService.update_by_id(kb.id, {task_id_field: "", task_finish_at_field: None})
     return True, {}
@@ -990,8 +1000,7 @@ async def search(dataset_id: str, tenant_id: str, req: dict):
         use_kg = search_config.get("use_kg", use_kg)
         langs = search_config.get("cross_languages", langs)
         logging.debug(
-            "Dataset search loaded Search config: search_id=%s dataset_id=%s "
-            "vector_similarity_weight=%s full_text_weight=%s similarity_threshold=%s top_k=%s",
+            "Dataset search loaded Search config: search_id=%s dataset_id=%s vector_similarity_weight=%s full_text_weight=%s similarity_threshold=%s top_k=%s",
             search_id,
             dataset_id,
             vector_similarity_weight,
@@ -1145,11 +1154,15 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
         index_nm = search.index_name(tenant_id)
 
         res0 = docStoreConn.search(
-            select_fields=[], highlight_fields=[],
+            select_fields=[],
+            highlight_fields=[],
             condition={"kb_id": kb_id, "available_int": 1},
-            match_expressions=[], order_by=OrderByExpr(),
-            offset=0, limit=1,
-            index_names=index_nm, knowledgebase_ids=[kb_id],
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[kb_id],
         )
         total = docStoreConn.get_total(res0)
         if total <= 0:
@@ -1164,9 +1177,12 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
                 select_fields=list(base_fields),
                 highlight_fields=[],
                 condition={"kb_id": kb_id, "available_int": 1},
-                match_expressions=[], order_by=OrderByExpr(),
-                offset=off, limit=1,
-                index_names=index_nm, knowledgebase_ids=[kb_id],
+                match_expressions=[],
+                order_by=OrderByExpr(),
+                offset=off,
+                limit=1,
+                index_names=index_nm,
+                knowledgebase_ids=[kb_id],
             )
             ids = docStoreConn.get_doc_ids(res1)
             if not ids:
@@ -1177,20 +1193,22 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
             vec_field = _guess_vec_field(full_doc)
             vec = _as_float_vec(full_doc.get(vec_field))
 
-            out.append({
-                "chunk_id": cid,
-                "kb_id": kb_id,
-                "doc_id": full_doc.get("doc_id"),
-                "doc_name": full_doc.get("docnm_kwd"),
-                "vector_field": vec_field,
-                "vector_dim": len(vec),
-                "vector": vec,
-                "page_num_int": full_doc.get("page_num_int"),
-                "position_int": full_doc.get("position_int"),
-                "top_int": full_doc.get("top_int"),
-                "content_with_weight": full_doc.get("content_with_weight") or "",
-                "question_kwd": full_doc.get("question_kwd") or [],
-            })
+            out.append(
+                {
+                    "chunk_id": cid,
+                    "kb_id": kb_id,
+                    "doc_id": full_doc.get("doc_id"),
+                    "doc_name": full_doc.get("docnm_kwd"),
+                    "vector_field": vec_field,
+                    "vector_dim": len(vec),
+                    "vector": vec,
+                    "page_num_int": full_doc.get("page_num_int"),
+                    "position_int": full_doc.get("position_int"),
+                    "top_int": full_doc.get("top_int"),
+                    "content_with_weight": full_doc.get("content_with_weight") or "",
+                    "question_kwd": full_doc.get("question_kwd") or [],
+                }
+            )
         return out
 
     def _clean(s: str):
@@ -1240,9 +1258,7 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
 
         try:
             v, _ = emb_mdl.encode([title, txt_in])
-            assert len(v[1]) == len(ck["vector"]), (
-                f"The dimension ({len(v[1])}) of given embedding model is different from the original ({len(ck['vector'])})"
-            )
+            assert len(v[1]) == len(ck["vector"]), f"The dimension ({len(v[1])}) of given embedding model is different from the original ({len(ck['vector'])})"
             sim_content = _cos_sim(v[1], ck["vector"])
             title_w = 0.1
             qv_mix = title_w * v[0] + (1 - title_w) * v[1]
@@ -1256,14 +1272,16 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
             return False, f"Embedding failure. {e}"
 
         eff_sims.append(sim)
-        results.append({
-            "chunk_id": ck["chunk_id"],
-            "doc_id": ck["doc_id"],
-            "doc_name": ck["doc_name"],
-            "vector_field": ck["vector_field"],
-            "vector_dim": ck["vector_dim"],
-            "cos_sim": round(sim, 6),
-        })
+        results.append(
+            {
+                "chunk_id": ck["chunk_id"],
+                "doc_id": ck["doc_id"],
+                "doc_name": ck["doc_name"],
+                "vector_field": ck["vector_field"],
+                "vector_dim": ck["vector_dim"],
+                "cos_sim": round(sim, 6),
+            }
+        )
 
     summary = {
         "kb_id": dataset_id,
@@ -1284,7 +1302,11 @@ def check_embedding(dataset_id: str, tenant_id: str, req: dict):
         logging.info("check_embedding: dataset=%s compatible avg_cos_sim=%s valid=%d", dataset_id, summary["avg_cos_sim"], len(eff_sims))
         return True, data
     logging.warning("check_embedding: dataset=%s not_effective avg_cos_sim=%s valid=%d", dataset_id, summary["avg_cos_sim"], len(eff_sims))
-    return "not_effective", {"code": RetCode.NOT_EFFECTIVE, "message": "Embedding model switch failed: the average similarity between old and new vectors is below 0.9, indicating incompatible vector spaces.", "data": data}
+    return "not_effective", {
+        "code": RetCode.NOT_EFFECTIVE,
+        "message": "Embedding model switch failed: the average similarity between old and new vectors is below 0.9, indicating incompatible vector spaces.",
+        "data": data,
+    }
 
 
 async def search_datasets(tenant_id: str, req: dict):
@@ -1359,8 +1381,7 @@ async def search_datasets(tenant_id: str, req: dict):
         use_kg = search_config.get("use_kg", use_kg)
         langs = search_config.get("cross_languages", langs)
         logging.debug(
-            "Dataset search loaded Search config: search_id=%s dataset_ids=%s "
-            "vector_similarity_weight=%s full_text_weight=%s similarity_threshold=%s top_k=%s",
+            "Dataset search loaded Search config: search_id=%s dataset_ids=%s vector_similarity_weight=%s full_text_weight=%s similarity_threshold=%s top_k=%s",
             search_id,
             kb_ids,
             vector_similarity_weight,
@@ -1382,8 +1403,7 @@ async def search_datasets(tenant_id: str, req: dict):
             chat_mdl = LLMBundle(tenant_id, chat_model_config)
 
     if meta_data_filter:
-        logging.debug("Metadata filter applied: %s, question length: %d, chat_mdl=%s",
-                      meta_data_filter, len(question), 'None' if chat_mdl is None else 'configured')
+        logging.debug("Metadata filter applied: %s, question length: %d, chat_mdl=%s", meta_data_filter, len(question), "None" if chat_mdl is None else "configured")
         local_doc_ids = await apply_meta_data_filter(
             meta_data_filter,
             None,
@@ -1457,3 +1477,1024 @@ async def search_datasets(tenant_id: str, req: dict):
     ranks["labels"] = labels
 
     return True, ranks
+
+
+# ---------------------------------------------------------------------------
+# Artifact (knowledge compilation) page surface
+#
+# These three helpers power the dataset-level "Artifact" tab. They query rows
+# with ``compile_kwd="artifact_page"`` written by TaskHandler's
+# ``_persist_wiki_pages_to_es``. The schema fields they rely on are:
+#   slug_kwd, title_kwd, page_type_kwd, content_with_weight,
+#   entity_names_kwd, outlinks_kwd, related_kb_pages_kwd,
+#   source_chunk_ids, source_doc_ids
+# ---------------------------------------------------------------------------
+
+_WIKI_COMPILE_KWD = "artifact_page"
+_SKILL_COMPILE_KWD = "skill"
+_SKILL_ALL_COMPILE_KWD = "skill_all"
+
+
+def _compiled_index_or_none(tenant_id: str, kb_id: str):
+    """Return (index_name, search_module) when the tenant index exists,
+    else ``None``. Avoids 500s on brand-new tenants whose ES index hasn't
+    been created yet."""
+    from rag.nlp import search as _rag_search
+
+    index_nm = _rag_search.index_name(tenant_id)
+    if not settings.docStoreConn.index_exist(index_nm, kb_id):
+        return None
+    return index_nm, _rag_search
+
+
+def _wiki_index_or_none(tenant_id: str, kb_id: str):
+    return _compiled_index_or_none(tenant_id, kb_id)
+
+
+def _skill_index_or_none(tenant_id: str, kb_id: str):
+    return _compiled_index_or_none(tenant_id, kb_id)
+
+
+async def has_any_wiki(dataset_id: str, tenant_id: str):
+    """Fast existence probe for the sidebar tab visibility check.
+
+    Returns ``(True, {"has": bool})`` on success or ``(False, str)`` on
+    auth failure. Runs a ``limit=1`` search and reads only the total.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, {"has": False}
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=["id"],
+            highlight_fields=[],
+            condition={"compile_kwd": [_WIKI_COMPILE_KWD]},
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+    except Exception:
+        logging.exception("has_any_wiki: docStore search failed for kb=%s", dataset_id)
+        return True, {"has": False}
+
+    total = settings.docStoreConn.get_total(res)
+    return True, {"has": bool(total)}
+
+
+async def list_wiki_pages(
+    dataset_id: str,
+    tenant_id: str,
+    page: int = 1,
+    page_size: int = 200,
+    page_type: str | None = None,
+):
+    """List artifact pages for the left-hand 2-column list.
+
+    Returns ``(True, {"total", "items": [{slug, title, page_type}, ...]})``.
+    Ordering: ``page_type`` ascending, then ``title`` ascending — keeps
+    pages of the same type grouped together visually.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, {"total": 0, "items": []}
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    page = max(1, int(page or 1))
+    page_size = max(1, min(int(page_size or 200), 1000))
+    offset = (page - 1) * page_size
+
+    condition: dict = {"compile_kwd": [_WIKI_COMPILE_KWD]}
+    if page_type:
+        condition["page_type_kwd"] = [page_type]
+
+    order_by = OrderByExpr()
+    try:
+        # Most-connected pages first: outlinks_int = len(outlinks_kwd) is
+        # written by the persistence layer for exactly this query.
+        order_by.desc("outlinks_int").asc("title_kwd")
+    except Exception:
+        # OrderByExpr API differs across doc-store backends; degrade to
+        # default order rather than 500.
+        order_by = OrderByExpr()
+
+    select_fields = [
+        "id",
+        "slug_kwd",
+        "title_kwd",
+        "page_type_kwd",
+        "outlinks_int",
+        "summary_with_weight",
+    ]
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=select_fields,
+            highlight_fields=[],
+            condition=condition,
+            match_expressions=[],
+            order_by=order_by,
+            offset=offset,
+            limit=page_size,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("list_wiki_pages: docStore search failed for kb=%s", dataset_id)
+        return True, {"total": 0, "items": []}
+
+    total = settings.docStoreConn.get_total(res)
+    items = []
+    for row in (field_map or {}).values():
+        slug = row.get("slug_kwd")
+        if not isinstance(slug, str) or not slug:
+            continue
+        items.append(
+            {
+                "slug": slug,
+                "title": row.get("title_kwd") or slug,
+                "page_type": row.get("page_type_kwd") or "concept",
+                "summary": row.get("summary_with_weight") or "",
+            }
+        )
+
+    return True, {"total": int(total or 0), "items": items}
+
+
+async def get_wiki_page(
+    dataset_id: str,
+    tenant_id: str,
+    page_type: str,
+    slug: str,
+):
+    """Fetch a single artifact page for the right-hand markdown viewer.
+
+    ``slug`` is the tail after ``<page_type>/`` — i.e. the URL component
+    that came from the markdown link ``artifact/<kb_id>/<page_type>/<slug>``.
+    The stored ``slug_kwd`` is the full ``<page_type>/<slug>`` form, so we
+    reconstruct it before the lookup.
+
+    Returns ``(True, page_dict)`` or ``(True, None)`` when no row matches.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, None
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    full_slug = f"{page_type}/{slug}" if "/" not in slug else slug
+    select_fields = [
+        "id",
+        "slug_kwd",
+        "title_kwd",
+        "page_type_kwd",
+        "content_with_weight",
+        "summary_with_weight",
+        "entity_names_kwd",
+        "outlinks_kwd",
+        "related_kb_pages_kwd",
+        "source_chunk_ids",
+        "source_doc_ids",
+    ]
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=select_fields,
+            highlight_fields=[],
+            condition={
+                "compile_kwd": [_WIKI_COMPILE_KWD],
+                "page_type_kwd": [page_type],
+                "slug_kwd": [full_slug],
+            },
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception(
+            "get_wiki_page: search failed for kb=%s slug=%s",
+            dataset_id,
+            full_slug,
+        )
+        return True, None
+
+    if not field_map:
+        return True, None
+
+    _, row = next(iter(field_map.items()))
+    content_md = row.get("content_with_weight") or ""
+    summary = row.get("summary_with_weight") or ""
+    return True, {
+        "slug": row.get("slug_kwd") or full_slug,
+        "title": row.get("title_kwd") or full_slug,
+        "page_type": row.get("page_type_kwd") or page_type,
+        "content_md_rendered": content_md,
+        "summary": summary,
+        "entity_names": row.get("entity_names_kwd") or [],
+        "outlinks": row.get("outlinks_kwd") or [],
+        "related_kb_pages": row.get("related_kb_pages_kwd") or [],
+        "source_chunk_ids": row.get("source_chunk_ids") or [],
+        "source_doc_ids": row.get("source_doc_ids") or [],
+    }
+
+
+async def has_any_skill(dataset_id: str, tenant_id: str):
+    """Fast existence probe for the dataset Skills sidebar entry."""
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _skill_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, {"has": False}
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=["id"],
+            highlight_fields=[],
+            condition={"compile_kwd": [_SKILL_ALL_COMPILE_KWD]},
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+    except Exception:
+        logging.exception("has_any_skill: docStore search failed for kb=%s", dataset_id)
+        return True, {"has": False}
+
+    total = settings.docStoreConn.get_total(res)
+    return True, {"has": bool(total)}
+
+
+async def get_skill_tree(dataset_id: str, tenant_id: str):
+    """Fetch the one-shot recursive skill tree for this dataset."""
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _skill_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, None
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    select_fields = ["id", "kb_id", "doc_id", "compile_kwd", "skill_with_weight"]
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=select_fields,
+            highlight_fields=[],
+            condition={"compile_kwd": [_SKILL_ALL_COMPILE_KWD]},
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("get_skill_tree: docStore search failed for kb=%s", dataset_id)
+        return True, None
+
+    if not field_map:
+        return True, None
+
+    _, row = next(iter(field_map.items()))
+    return True, {
+        "id": row.get("id"),
+        "kb_id": row.get("kb_id") or dataset_id,
+        "doc_id": row.get("doc_id") or dataset_id,
+        "compile_kwd": row.get("compile_kwd") or _SKILL_ALL_COMPILE_KWD,
+        "skill_with_weight": json.loads(row.get("skill_with_weight")) or [],
+    }
+
+
+async def get_skill_page(dataset_id: str, tenant_id: str, skill_kwd: str):
+    """Fetch the full markdown body for a single skill node."""
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _skill_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, None
+    index_nm, _ = pack
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    select_fields = [
+        "id",
+        "kb_id",
+        "doc_id",
+        "compile_kwd",
+        "skill_kwd",
+        "depth_int",
+        "children_kwd",
+        "source_doc_ids",
+        "md_with_weight",
+    ]
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=select_fields,
+            highlight_fields=[],
+            condition={
+                "compile_kwd": [_SKILL_COMPILE_KWD],
+                "skill_kwd": [skill_kwd],
+            },
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception(
+            "get_skill_page: docStore search failed for kb=%s skill=%s",
+            dataset_id,
+            skill_kwd,
+        )
+        return True, None
+
+    if not field_map:
+        return True, None
+
+    _, row = next(iter(field_map.items()))
+    return True, {
+        "id": row.get("id"),
+        "kb_id": row.get("kb_id") or dataset_id,
+        "doc_id": row.get("doc_id") or dataset_id,
+        "compile_kwd": row.get("compile_kwd") or _SKILL_COMPILE_KWD,
+        "skill_kwd": row.get("skill_kwd") or skill_kwd,
+        "depth_int": row.get("depth_int") or 0,
+        "children_kwd": row.get("children_kwd") or [],
+        "source_doc_ids": row.get("source_doc_ids") or [],
+        "md_with_weight": row.get("md_with_weight") or "",
+    }
+
+
+async def update_wiki_page(
+    dataset_id: str,
+    tenant_id: str,
+    page_type: str,
+    slug: str,
+    content_md: str,
+    *,
+    user_id: str | None = None,
+    title: str | None = None,
+    comments: str | None = None,
+):
+    """Edit an artifact page in place from the canvas double-click dialog.
+
+    Body must contain ``content_md`` — the (possibly edited) page markdown.
+    We run it through ``_wiki_transform_links`` so any newly typed
+    ``[[slug]]`` references upgrade to clickable artifact URLs (and pre-rendered
+    links pass through unchanged — the transform is idempotent on already-
+    rendered markdown). ``summary`` is re-derived from the new rendered text.
+    ``outlinks_kwd`` is rebuilt from the link-transform pass.
+
+    Per the v1 contract, only the page row is updated. The canvas
+    ``artifact_page_graph`` / ``artifact_entity`` / ``artifact_relation``
+    rows stay stale until the next full artifact compile.
+
+    Side effect: when the rendered post-save markdown differs from the
+    prior stored content, one ``artifact_commit`` row is recorded
+    (git-style audit). No-op saves are silently skipped — empty diff,
+    no row.
+
+    Returns ``(True, page_dict)`` mirroring ``get_wiki_page``, or
+    ``(True, None)`` when the row is missing, or
+    ``(False, message)`` on authorization failure.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, None
+    index_nm, _ = pack
+
+    from rag.advanced_rag.knowlege_compile.wiki import (
+        _wiki_transform_links,
+        _wiki_extract_summary,
+    )
+    from api.db.services.file_commit_service import FileCommitService
+
+    full_slug = f"{page_type}/{slug}" if "/" not in slug else slug
+
+    # Capture the pre-edit rendered content + the row id. Both come from
+    # the same search: the row id is the dict key returned by
+    # docStoreConn.get_fields. We need the id specifically because the
+    # generic non-id update path (ESConnection.update slow branch) routes
+    # through a Painless script that scrubs newlines / single quotes /
+    # backslash escapes from string values — which would collapse every
+    # paragraph in the saved markdown to one line. Passing the row id in
+    # ``condition`` selects the fast partial-update branch which preserves
+    # the JSON value verbatim.
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    row_id: str | None = None
+    content_before = ""
+    try:
+        res = settings.docStoreConn.search(
+            select_fields=["id", "content_with_weight"],
+            highlight_fields=[],
+            condition={
+                "compile_kwd": [_WIKI_COMPILE_KWD],
+                "page_type_kwd": [page_type],
+                "slug_kwd": [full_slug],
+            },
+            match_expressions=[],
+            order_by=OrderByExpr(),
+            offset=0,
+            limit=1,
+            index_names=index_nm,
+            knowledgebase_ids=[dataset_id],
+        )
+        field_map = settings.docStoreConn.get_fields(
+            res,
+            ["id", "content_with_weight"],
+        )
+        if field_map:
+            row_id, row = next(iter(field_map.items()))
+            content_before = row.get("content_with_weight") or ""
+    except Exception:
+        logging.exception(
+            "update_wiki_page: lookup failed for kb=%s slug=%s",
+            dataset_id,
+            full_slug,
+        )
+    if not row_id:
+        return True, None
+
+    content_md = content_md or ""
+    rendered, outlinks = _wiki_transform_links(content_md, dataset_id)
+    summary = _wiki_extract_summary(rendered) or ""
+
+    try:
+        # id-keyed condition forces the partial-update fast path — no
+        # newline scrubbing. See the comment above the lookup for the
+        # full reasoning.
+        ok = settings.docStoreConn.update(
+            {"id": row_id},
+            {
+                "content_with_weight": rendered,
+                "summary_with_weight": summary,
+                "outlinks_kwd": list(outlinks),
+            },
+            index_nm,
+            dataset_id,
+        )
+    except Exception:
+        logging.exception(
+            "update_wiki_page: docStore update failed for kb=%s slug=%s",
+            dataset_id,
+            full_slug,
+        )
+        return True, None
+
+    if not ok:
+        return True, None
+
+    # Record a file_commit row on every real change. ``record_page_edit``
+    # returns None for empty-diff saves, which we silently swallow.
+    try:
+        FileCommitService.record_page_edit(
+            tenant_id=tenant_id,
+            kb_id=dataset_id,
+            page_type=page_type,
+            slug=full_slug,
+            content_before=content_before,
+            content_after=rendered,
+            title=title,
+            comments=comments,
+            user_id=user_id,
+        )
+    except Exception:
+        logging.exception(
+            "update_wiki_page: file_commit record failed for kb=%s slug=%s",
+            dataset_id,
+            full_slug,
+        )
+
+    # Re-read the row so the dialog gets the canonical post-update state.
+    return await get_wiki_page(dataset_id, tenant_id, page_type, slug)
+
+
+# ``list_wiki_commits`` / ``get_wiki_commit`` retired — the two
+# ``/datasets/<id>/artifacts/.../commits`` REST endpoints now go through
+# the generic file-commit routes (``/datasets/<id>/commits`` with an
+# optional ``?slug=`` filter), backed by
+# :meth:`FileCommitService.list_page_commits` and
+# :meth:`FileCommitService.get_page_commit_detail`.
+
+
+# All six row types the artifact pipeline writes. Listed in dependency
+# order so partial failures of earlier deletes don't leave behind state
+# that downstream phases would silently reuse. ``artifact_page_graph``
+# is the materialized canvas graph derived from the refined pages —
+# the dataset Artifact tab's graph view reads exactly this row.
+_WIKI_COMPILE_KWDS = (
+    "artifact_map_extract",
+    "artifact_reduce_result",
+    "artifact_compilation_plan",
+    "artifact_page_draft",
+    "artifact_page",
+    "artifact_entity",
+    "artifact_relation",
+)
+
+# Tunables for the incremental graph loader. See ``get_wiki_graph``.
+_WIKI_GRAPH_ENTITY_KWD = "artifact_entity"
+_WIKI_GRAPH_RELATION_KWD = "artifact_relation"
+_WIKI_GRAPH_ENTITY_PAGE_SIZE = 32
+_WIKI_GRAPH_MAX_LOADING_ENTITY = 128
+
+
+def _wiki_entity_payload(row: dict) -> dict | None:
+    """Project one ``artifact_entity`` ES row onto the canvas entity shape.
+
+    The row stores the canvas payload pre-built as JSON in
+    ``content_with_weight``; we parse it back and overlay the columns
+    the writer set independently (weight_int, source_chunk_ids) so the
+    frontend gets the authoritative numbers regardless of any
+    JSON-vs-column drift.
+    """
+    raw = row.get("content_with_weight") or ""
+    payload: dict = {}
+    if isinstance(raw, str) and raw.strip():
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict):
+                payload = parsed
+        except Exception:
+            pass
+    slug = payload.get("slug") or row.get("slug_kwd")
+    if not isinstance(slug, str) or not slug:
+        return None
+    out = {
+        "slug": slug,
+        "name": payload.get("name") or slug,
+        "aliases": list(payload.get("aliases") or []),
+        "description": payload.get("description") or "",
+        "type": payload.get("type") or "concept",
+        "weight": int(row.get("weight_int") or payload.get("weight") or 0),
+    }
+    source_chunk_ids = row.get("source_chunk_ids") or []
+    if isinstance(source_chunk_ids, list):
+        out["source_chunk_ids"] = [c for c in source_chunk_ids if isinstance(c, str) and c]
+    return out
+
+
+def _wiki_relation_payload(row: dict) -> dict | None:
+    raw = row.get("content_with_weight") or ""
+    payload: dict = {}
+    if isinstance(raw, str) and raw.strip():
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict):
+                payload = parsed
+        except Exception:
+            pass
+    src = payload.get("from") or row.get("from_kwd")
+    tgt = payload.get("to") or row.get("to_kwd")
+    if not isinstance(src, str) or not src or not isinstance(tgt, str) or not tgt:
+        return None
+    return {"from": src, "to": tgt}
+
+
+async def _wiki_search_entity_page(
+    index_nm,
+    dataset_id: str,
+    offset: int,
+    limit: int,
+):
+    """One page of artifact_entity rows, ordered by weight_int DESC."""
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    order_by = OrderByExpr()
+    try:
+        order_by.desc("weight_int")
+    except Exception:
+        order_by = OrderByExpr()
+
+    select_fields = [
+        "id",
+        "slug_kwd",
+        "weight_int",
+        "source_chunk_ids",
+        "content_with_weight",
+    ]
+    res = await thread_pool_exec(
+        settings.docStoreConn.search,
+        select_fields,
+        [],
+        {"compile_kwd": [_WIKI_GRAPH_ENTITY_KWD]},
+        [],
+        order_by,
+        offset,
+        limit,
+        index_nm,
+        [dataset_id],
+    )
+    return settings.docStoreConn.get_fields(res, select_fields)
+
+
+async def _wiki_search_entities_by_slugs(
+    index_nm,
+    dataset_id: str,
+    slugs: list[str],
+):
+    """Fetch entity rows whose ``slug_kwd`` is in ``slugs``. Unordered."""
+    if not slugs:
+        return {}
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    select_fields = [
+        "id",
+        "slug_kwd",
+        "weight_int",
+        "source_chunk_ids",
+        "content_with_weight",
+    ]
+    res = await thread_pool_exec(
+        settings.docStoreConn.search,
+        select_fields,
+        [],
+        {
+            "compile_kwd": [_WIKI_GRAPH_ENTITY_KWD],
+            "slug_kwd": list(slugs),
+        },
+        [],
+        OrderByExpr(),
+        0,
+        max(len(slugs), 1),
+        index_nm,
+        [dataset_id],
+    )
+    return settings.docStoreConn.get_fields(res, select_fields)
+
+
+async def _wiki_search_relations_from(
+    index_nm,
+    dataset_id: str,
+    from_slugs: list[str],
+):
+    """Fetch all relation rows with ``from_kwd`` in ``from_slugs``."""
+    if not from_slugs:
+        return {}
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    select_fields = ["id", "from_kwd", "to_kwd", "content_with_weight"]
+    # Generous upper bound: relations are short; bulk-pull all matching at
+    # once rather than paging.
+    res = await thread_pool_exec(
+        settings.docStoreConn.search,
+        select_fields,
+        [],
+        {
+            "compile_kwd": [_WIKI_GRAPH_RELATION_KWD],
+            "from_kwd": list(from_slugs),
+        },
+        [],
+        OrderByExpr(),
+        0,
+        10000,
+        index_nm,
+        [dataset_id],
+    )
+    return settings.docStoreConn.get_fields(res, select_fields)
+
+
+async def get_wiki_graph(
+    dataset_id: str,
+    tenant_id: str,
+    node: str | None = None,
+):
+    """Load the canvas graph payload incrementally from per-row data.
+
+    Two modes:
+
+    * **Overview** (``node`` is None) — paginate ``artifact_entity`` rows
+      ordered by ``weight_int DESC`` in pages of
+      ``_WIKI_GRAPH_ENTITY_PAGE_SIZE``. For each page, append entities
+      to a running set while the **cumulative** weight stays within
+      ``_WIKI_GRAPH_MAX_LOADING_ENTITY``. Pull ``artifact_relation``
+      rows whose ``from_kwd`` is in the just-added entities; pull the
+      ``to`` targets that we haven't seen yet (they count toward the same
+      cap). Stop once the cap is hit, or the page is empty, or no entry
+      from the page fit under the budget.
+
+    * **Click** (``node`` is a slug) — load the centre entity (always
+      included), pull every ``artifact_relation`` with ``from_kwd=node``,
+      then pull the ``to`` entities. Capped at
+      ``_WIKI_GRAPH_MAX_LOADING_ENTITY`` for hub-node safety.
+
+    Returns ``(True, {"entities": [...], "relations": [...]})`` shaped
+    exactly as the frontend ``ForceGraph`` adapter consumes, or
+    ``(False, message)`` on authorization failure.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    empty = {"entities": [], "relations": []}
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, empty
+    index_nm, _ = pack
+
+    cap = _WIKI_GRAPH_MAX_LOADING_ENTITY
+    page_size = _WIKI_GRAPH_ENTITY_PAGE_SIZE
+
+    # ``entities`` preserves first-seen order so the canvas paints the
+    # heaviest-weighted nodes first (or, in click mode, the centre node
+    # first). The dict-keyed-by-slug structure also deduplicates the
+    # "B is a to-target AND later a high-weight entity in its own right"
+    # case cheaply.
+    entities: dict[str, dict] = {}
+    relations: list[dict] = []
+    relation_keys: set[tuple[str, str]] = set()
+
+    def _add_entity(payload: dict) -> bool:
+        slug = payload.get("slug")
+        if not isinstance(slug, str) or not slug or slug in entities:
+            return False
+        entities[slug] = payload
+        return True
+
+    def _add_relation(payload: dict) -> None:
+        key = (payload["from"], payload["to"])
+        if key in relation_keys:
+            return
+        relation_keys.add(key)
+        relations.append(payload)
+
+    # ---- Flow B — click expansion centred on ``node``. ----------------
+    if isinstance(node, str) and node.strip():
+        center_slug = node.strip()
+        try:
+            field_map = await _wiki_search_entities_by_slugs(
+                index_nm,
+                dataset_id,
+                [center_slug],
+            )
+        except Exception:
+            logging.exception(
+                "get_wiki_graph: centre lookup failed kb=%s node=%s",
+                dataset_id,
+                center_slug,
+            )
+            return True, empty
+
+        for row in (field_map or {}).values():
+            payload = _wiki_entity_payload(row)
+            if payload:
+                _add_entity(payload)
+                break
+
+        if center_slug not in entities:
+            # Caller pointed at a slug that doesn't exist; return empty
+            # rather than a confusing partial graph.
+            return True, empty
+
+        # Outgoing edges from the centre, capped by MAX_LOADING_ENTITY.
+        try:
+            rel_map = await _wiki_search_relations_from(
+                index_nm,
+                dataset_id,
+                [center_slug],
+            )
+        except Exception:
+            logging.exception(
+                "get_wiki_graph: relation lookup failed kb=%s node=%s",
+                dataset_id,
+                center_slug,
+            )
+            return True, {"entities": list(entities.values()), "relations": []}
+
+        to_slugs: list[str] = []
+        for row in (rel_map or {}).values():
+            payload = _wiki_relation_payload(row)
+            if payload is None:
+                continue
+            if payload["from"] != center_slug:
+                continue
+            # Hub-node cap: stop accepting more relations once the
+            # to-target set would push us over the entity budget.
+            if payload["to"] not in entities and len(entities) + len(to_slugs) >= cap:
+                continue
+            _add_relation(payload)
+            if payload["to"] != center_slug and payload["to"] not in entities:
+                if payload["to"] not in to_slugs:
+                    to_slugs.append(payload["to"])
+
+        if to_slugs:
+            try:
+                to_map = await _wiki_search_entities_by_slugs(
+                    index_nm,
+                    dataset_id,
+                    to_slugs,
+                )
+            except Exception:
+                logging.exception(
+                    "get_wiki_graph: neighbour lookup failed kb=%s node=%s",
+                    dataset_id,
+                    center_slug,
+                )
+                to_map = {}
+            for row in (to_map or {}).values():
+                payload = _wiki_entity_payload(row)
+                if payload and len(entities) < cap:
+                    _add_entity(payload)
+
+        return True, {
+            "entities": list(entities.values()),
+            "relations": relations,
+        }
+
+    # ---- Flow A — overview, top-weight paged with cumulative budget. ---
+    cumulative_weight = 0
+    page = 1
+    while len(entities) < cap:
+        offset = (page - 1) * page_size
+        try:
+            field_map = await _wiki_search_entity_page(
+                index_nm,
+                dataset_id,
+                offset,
+                page_size,
+            )
+        except Exception:
+            logging.exception(
+                "get_wiki_graph: entity page fetch failed kb=%s page=%d",
+                dataset_id,
+                page,
+            )
+            break
+        if not field_map:
+            break
+
+        # Preserve weight_int DESC order from ES. Iteration over a dict
+        # produced by get_fields keeps insertion order; ES returned them
+        # sorted, so we can rely on that.
+        page_rows = list(field_map.values())
+
+        e_sub: list[dict] = []
+        for row in page_rows:
+            payload = _wiki_entity_payload(row)
+            if payload is None:
+                continue
+            if payload["slug"] in entities:
+                continue
+            w = max(0, int(payload.get("weight") or 0))
+            # Step 2: cumulative across the whole flow (per the spec).
+            # Stop when adding this entry would push the budget over.
+            # If even the first entity on a page can't fit, we exit the
+            # outer loop below; this preserves the "least-weight first
+            # excluded" semantics.
+            # if cumulative_weight + w > cap and len(entities) + len(e_sub) > 0:
+            #    break
+            cumulative_weight += w
+            e_sub.append(payload)
+            if len(entities) + len(e_sub) >= cap:
+                break
+
+        if not e_sub:
+            break
+
+        for payload in e_sub:
+            _add_entity(payload)
+
+        # Step 3: relations originating in E_sub.
+        sub_slugs = [p["slug"] for p in e_sub]
+        try:
+            rel_map = await _wiki_search_relations_from(
+                index_nm,
+                dataset_id,
+                sub_slugs,
+            )
+        except Exception:
+            logging.exception(
+                "get_wiki_graph: relation page fetch failed kb=%s",
+                dataset_id,
+            )
+            rel_map = {}
+
+        missing_to: list[str] = []
+        for row in (rel_map or {}).values():
+            payload = _wiki_relation_payload(row)
+            if payload is None:
+                continue
+            _add_relation(payload)
+            if payload["to"] not in entities and payload["to"] not in missing_to:
+                missing_to.append(payload["to"])
+
+        # Step 4: hydrate the to-targets (they count toward the cap).
+        if missing_to:
+            try:
+                to_map = await _wiki_search_entities_by_slugs(
+                    index_nm,
+                    dataset_id,
+                    missing_to,
+                )
+            except Exception:
+                logging.exception(
+                    "get_wiki_graph: to-target hydrate failed kb=%s",
+                    dataset_id,
+                )
+                to_map = {}
+            for row in (to_map or {}).values():
+                if len(entities) >= cap:
+                    break
+                payload = _wiki_entity_payload(row)
+                if payload:
+                    _add_entity(payload)
+
+        # Step 5: page forward only if the cap allows another iteration.
+        if len(entities) >= cap or len(page_rows) < page_size:
+            break
+        page += 1
+
+    return True, {
+        "entities": list(entities.values()),
+        "relations": relations,
+    }
+
+
+async def clear_wiki(dataset_id: str, tenant_id: str):
+    """Wipe every artifact-related row from ES for this KB.
+
+    Touches all five ``compile_kwd`` row types the artifact pipeline writes
+    (MAP extracts, REDUCE results, PLAN output, page drafts, and the
+    searchable artifact_page rows). After this completes the next "Artifact"
+    run starts from a clean slate — no resume cache to short-circuit MAP, no
+    prior pages to reconcile against in PLAN.
+
+    Returns ``(True, {"deleted": {kwd: count_or_True}})`` on success or
+    ``(False, str)`` on auth failure.
+    """
+    if not KnowledgebaseService.accessible(dataset_id, tenant_id):
+        return False, "No authorization."
+    _, kb = KnowledgebaseService.get_by_id(dataset_id)
+
+    pack = _wiki_index_or_none(kb.tenant_id, dataset_id)
+    if pack is None:
+        return True, {"deleted": {}}
+    index_nm, _ = pack
+
+    deleted: dict[str, object] = {}
+    for kwd in _WIKI_COMPILE_KWDS:
+        try:
+            res = settings.docStoreConn.delete(
+                {"compile_kwd": kwd},
+                index_nm,
+                dataset_id,
+            )
+            # Different backends return different shapes (int count, dict,
+            # bool). Surface whatever we got so the caller can log it.
+            deleted[kwd] = res if res is not None else True
+        except Exception:
+            logging.exception(
+                "clear_wiki: delete failed for kwd=%s kb=%s",
+                kwd,
+                dataset_id,
+            )
+            deleted[kwd] = False
+
+    return True, {"deleted": deleted}
diff --git a/api/apps/services/document_api_service.py b/api/apps/services/document_api_service.py
index a80689c12a..d7b45e6218 100644
--- a/api/apps/services/document_api_service.py
+++ b/api/apps/services/document_api_service.py
@@ -61,6 +61,7 @@ def update_document_name_only(document_id, req_doc_name):
         )
     return None
 
+
 def update_chunk_method(req, doc, tenant_id):
     """
     Update chunk method only (without validation).
@@ -146,7 +147,7 @@ def reset_document_for_reparse(doc, tenant_id, parser_id=None, pipeline_id=None)
     return None
 
 
-def update_document_status_only(status:int, doc, kb):
+def update_document_status_only(status: int, doc, kb):
     """
     Update document status only (without validation).
 
@@ -165,13 +166,18 @@ def update_document_status_only(status:int, doc, kb):
         try:
             if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
                 return get_error_data_result(message="Database error (Document update)!")
-            settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
+            settings.docStoreConn.update(
+                {"doc_id": doc.id, "must_not": {"exists": "compile_kwd"}},
+                {"available_int": status},
+                search.index_name(kb.tenant_id),
+                doc.kb_id,
+            )
         except Exception as e:
             return server_error_response(e)
     return None
 
 
-def validate_document_update_fields(update_doc_req:UpdateDocumentReq, doc, req):
+def validate_document_update_fields(update_doc_req: UpdateDocumentReq, doc, req):
     """
     Validate document update fields in a single method.
 
@@ -269,7 +275,7 @@ def _process_key_mappings(doc):
     }
 
     # Handle both dict and model input
-    items = doc.to_dict().items() if hasattr(doc, 'to_dict') else doc.items()
+    items = doc.to_dict().items() if hasattr(doc, "to_dict") else doc.items()
 
     renamed_doc = {}
     for key, value in items:
diff --git a/api/db/__init__.py b/api/db/__init__.py
index ffcd8e7b3d..2468d59473 100644
--- a/api/db/__init__.py
+++ b/api/db/__init__.py
@@ -21,15 +21,15 @@ from common.constants import PipelineTaskType
 
 
 class UserTenantRole(StrEnum):
-    OWNER = 'owner'
-    ADMIN = 'admin'
-    NORMAL = 'normal'
-    INVITE = 'invite'
+    OWNER = "owner"
+    ADMIN = "admin"
+    NORMAL = "normal"
+    INVITE = "invite"
 
 
 class TenantPermission(StrEnum):
-    ME = 'me'
-    TEAM = 'team'
+    ME = "me"
+    TEAM = "team"
 
 
 class SerializedType(IntEnum):
@@ -38,14 +38,15 @@ class SerializedType(IntEnum):
 
 
 class FileType(StrEnum):
-    PDF = 'pdf'
-    DOC = 'doc'
-    VISUAL = 'visual'
-    AURAL = 'aural'
-    VIRTUAL = 'virtual'
-    FOLDER = 'folder'
+    PDF = "pdf"
+    DOC = "doc"
+    VISUAL = "visual"
+    AURAL = "aural"
+    VIRTUAL = "virtual"
+    FOLDER = "folder"
     OTHER = "other"
 
+
 VALID_FILE_TYPES = {FileType.PDF, FileType.DOC, FileType.VISUAL, FileType.AURAL, FileType.VIRTUAL, FileType.FOLDER, FileType.OTHER}
 
 
@@ -61,11 +62,31 @@ class CanvasCategory(StrEnum):
     DataFlow = "dataflow_canvas"
 
 
-VALID_PIPELINE_TASK_TYPES = {PipelineTaskType.PARSE, PipelineTaskType.DOWNLOAD, PipelineTaskType.RAPTOR, PipelineTaskType.GRAPH_RAG, PipelineTaskType.MINDMAP}
+VALID_PIPELINE_TASK_TYPES = {
+    PipelineTaskType.PARSE,
+    PipelineTaskType.DOWNLOAD,
+    PipelineTaskType.RAPTOR,
+    PipelineTaskType.GRAPH_RAG,
+    PipelineTaskType.MINDMAP,
+    PipelineTaskType.ARTIFACT,
+    PipelineTaskType.SKILL,
+}
 
 
-PIPELINE_SPECIAL_PROGRESS_FREEZE_TASK_TYPES = {PipelineTaskType.RAPTOR.lower(), PipelineTaskType.GRAPH_RAG.lower(), PipelineTaskType.MINDMAP.lower()}
+# KB-level fan-out task types: their Task row uses GRAPH_RAPTOR_FAKE_DOC_ID as a
+# sentinel doc_id, and ``task_executor.collect_task`` substitutes the first real
+# doc_id from ``msg["doc_ids"]`` before re-running ``TaskService.get_task`` so
+# the join through Document → Knowledgebase → Tenant resolves and tenant_id /
+# kb_id / language are hydrated onto the task dict. Add new fan-out task types
+# here or TaskContext will raise "Task must contain 'tenant_id'".
+PIPELINE_SPECIAL_PROGRESS_FREEZE_TASK_TYPES = {
+    PipelineTaskType.RAPTOR.lower(),
+    PipelineTaskType.GRAPH_RAG.lower(),
+    PipelineTaskType.MINDMAP.lower(),
+    PipelineTaskType.ARTIFACT.lower(),
+    PipelineTaskType.SKILL.lower(),
+}
 
 
-KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
-SKILLS_FOLDER_NAME="skills"
+KNOWLEDGEBASE_FOLDER_NAME = ".knowledgebase"
+SKILLS_FOLDER_NAME = "skills"
diff --git a/api/db/db_models.py b/api/db/db_models.py
index b1ead35fea..74dc5e5c0b 100644
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@@ -269,19 +269,13 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
                 return super().execute_sql(sql, params, commit)
             except (OperationalError, InterfaceError) as e:
                 error_codes = [2013, 2006]
-                error_messages = ['', 'Lost connection']
-                should_retry = (
-                    (hasattr(e, 'args') and e.args and e.args[0] in error_codes) or
-                    (str(e) in error_messages) or
-                    (hasattr(e, '__class__') and e.__class__.__name__ == 'InterfaceError')
-                )
+                error_messages = ["", "Lost connection"]
+                should_retry = (hasattr(e, "args") and e.args and e.args[0] in error_codes) or (str(e) in error_messages) or (hasattr(e, "__class__") and e.__class__.__name__ == "InterfaceError")
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"Database connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
-                    )
+                    logging.warning(f"Database connection issue (attempt {attempt + 1}/{self.max_retries}): {e}")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     logging.error(f"DB execution failure: {e}")
                     raise
@@ -311,20 +305,14 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
                 return super().begin()
             except (OperationalError, InterfaceError) as e:
                 error_codes = [2013, 2006]
-                error_messages = ['', 'Lost connection']
+                error_messages = ["", "Lost connection"]
 
-                should_retry = (
-                    (hasattr(e, 'args') and e.args and e.args[0] in error_codes) or
-                    (str(e) in error_messages) or
-                    (hasattr(e, '__class__') and e.__class__.__name__ == 'InterfaceError')
-                )
+                should_retry = (hasattr(e, "args") and e.args and e.args[0] in error_codes) or (str(e) in error_messages) or (hasattr(e, "__class__") and e.__class__.__name__ == "InterfaceError")
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"Lost connection during transaction (attempt {attempt+1}/{self.max_retries})"
-                    )
+                    logging.warning(f"Lost connection during transaction (attempt {attempt + 1}/{self.max_retries})")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     raise
         return None
@@ -348,17 +336,14 @@ class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
                 # 08006: connection_failure
                 # 08003: connection_does_not_exist
                 # 08000: connection_exception
-                error_messages = ['connection', 'server closed', 'connection refused',
-                                'no connection to the server', 'terminating connection']
+                error_messages = ["connection", "server closed", "connection refused", "no connection to the server", "terminating connection"]
 
                 should_retry = any(msg in str(e).lower() for msg in error_messages)
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"PostgreSQL connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
-                    )
+                    logging.warning(f"PostgreSQL connection issue (attempt {attempt + 1}/{self.max_retries}): {e}")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     logging.error(f"PostgreSQL execution failure: {e}")
                     raise
@@ -385,17 +370,14 @@ class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
             try:
                 return super().begin()
             except (OperationalError, InterfaceError) as e:
-                error_messages = ['connection', 'server closed', 'connection refused',
-                                'no connection to the server', 'terminating connection']
+                error_messages = ["connection", "server closed", "connection refused", "no connection to the server", "terminating connection"]
 
                 should_retry = any(msg in str(e).lower() for msg in error_messages)
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"PostgreSQL connection lost during transaction (attempt {attempt+1}/{self.max_retries})"
-                    )
+                    logging.warning(f"PostgreSQL connection lost during transaction (attempt {attempt + 1}/{self.max_retries})")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     raise
         return None
@@ -407,6 +389,7 @@ class RetryingPooledOceanBaseDatabase(PooledMySQLDatabase):
     OceanBase is compatible with MySQL protocol, so we inherit from PooledMySQLDatabase.
     This class provides connection pooling and automatic retry for connection issues.
     """
+
     def __init__(self, *args, **kwargs):
         self.max_retries = kwargs.pop("max_retries", 5)
         self.retry_delay = kwargs.pop("retry_delay", 1)
@@ -421,20 +404,18 @@ class RetryingPooledOceanBaseDatabase(PooledMySQLDatabase):
                 # 2013: Lost connection to MySQL server during query
                 # 2006: MySQL server has gone away
                 error_codes = [2013, 2006]
-                error_messages = ['', 'Lost connection', 'gone away']
+                error_messages = ["", "Lost connection", "gone away"]
 
                 should_retry = (
-                    (hasattr(e, 'args') and e.args and e.args[0] in error_codes) or
-                    any(msg in str(e).lower() for msg in error_messages) or
-                    (hasattr(e, '__class__') and e.__class__.__name__ == 'InterfaceError')
+                    (hasattr(e, "args") and e.args and e.args[0] in error_codes)
+                    or any(msg in str(e).lower() for msg in error_messages)
+                    or (hasattr(e, "__class__") and e.__class__.__name__ == "InterfaceError")
                 )
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"OceanBase connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
-                    )
+                    logging.warning(f"OceanBase connection issue (attempt {attempt + 1}/{self.max_retries}): {e}")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     logging.error(f"OceanBase execution failure: {e}")
                     raise
@@ -462,20 +443,14 @@ class RetryingPooledOceanBaseDatabase(PooledMySQLDatabase):
                 return super().begin()
             except (OperationalError, InterfaceError) as e:
                 error_codes = [2013, 2006]
-                error_messages = ['', 'Lost connection']
+                error_messages = ["", "Lost connection"]
 
-                should_retry = (
-                    (hasattr(e, 'args') and e.args and e.args[0] in error_codes) or
-                    (str(e) in error_messages) or
-                    (hasattr(e, '__class__') and e.__class__.__name__ == 'InterfaceError')
-                )
+                should_retry = (hasattr(e, "args") and e.args and e.args[0] in error_codes) or (str(e) in error_messages) or (hasattr(e, "__class__") and e.__class__.__name__ == "InterfaceError")
 
                 if should_retry and attempt < self.max_retries:
-                    logging.warning(
-                        f"Lost connection during transaction (attempt {attempt+1}/{self.max_retries})"
-                    )
+                    logging.warning(f"Lost connection during transaction (attempt {attempt + 1}/{self.max_retries})")
                     self._handle_connection_loss()
-                    time.sleep(self.retry_delay * (2 ** attempt))
+                    time.sleep(self.retry_delay * (2**attempt))
                 else:
                     raise
         return None
@@ -500,13 +475,11 @@ class BaseDataBase:
         db_name = database_config.pop("name")
 
         pool_config = {
-            'max_retries': 5,
-            'retry_delay': 1,
+            "max_retries": 5,
+            "retry_delay": 1,
         }
         database_config.update(pool_config)
-        self.database_connection = PooledDatabase[settings.DATABASE_TYPE.upper()].value(
-            db_name, **database_config
-        )
+        self.database_connection = PooledDatabase[settings.DATABASE_TYPE.upper()].value(db_name, **database_config)
         # self.database_connection = PooledDatabase[settings.DATABASE_TYPE.upper()].value(db_name, **database_config)
         logging.info("init database on cluster mode successfully")
 
@@ -846,9 +819,7 @@ class TenantLLM(DataBaseModel):
 
     class Meta:
         db_table = "tenant_llm"
-        indexes = (
-            (("tenant_id", "llm_factory", "llm_name"), True),
-        )
+        indexes = ((("tenant_id", "llm_factory", "llm_name"), True),)
 
 
 class TenantLangfuse(DataBaseModel):
@@ -892,6 +863,10 @@ class Knowledgebase(DataBaseModel):
     raptor_task_finish_at = DateTimeField(null=True)
     mindmap_task_id = CharField(max_length=32, null=True, help_text="Mindmap task ID", index=True)
     mindmap_task_finish_at = DateTimeField(null=True)
+    artifact_task_id = CharField(max_length=32, null=True, help_text="Artifact compilation task ID", index=True)
+    artifact_task_finish_at = DateTimeField(null=True)
+    skill_task_id = CharField(max_length=32, null=True, help_text="Skill generation task ID", index=True)
+    skill_task_finish_at = DateTimeField(null=True)
 
     status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
 
@@ -964,6 +939,13 @@ class FileCommit(DataBaseModel):
     author_id = CharField(max_length=32, null=False, help_text="user who created the commit", index=True)
     file_count = IntegerField(default=0, help_text="number of files in this commit")
     tree_state = LongTextField(null=True, help_text="JSON snapshot of the full folder tree at this commit")
+    # ---- Artifact-commit extension ----
+    # Populated only for commits recorded via
+    # ``FileCommitService.record_page_edit`` (i.e. artifact-page saves).
+    # For workspace file commits both fields stay null and the ``message``
+    # column carries the commit body.
+    title = CharField(max_length=255, null=True, help_text="commit title (artifact-page edits)")
+    comments = TextField(null=True, help_text="commit body/description (artifact-page edits)")
 
     class Meta:
         db_table = "file_commit"
@@ -980,6 +962,14 @@ class FileCommitItem(DataBaseModel):
     new_location = CharField(max_length=255, null=True, help_text="new storage location")
     old_name = CharField(max_length=255, null=True, help_text="old file name (for rename)")
     new_name = CharField(max_length=255, null=True, help_text="new file name (for rename)")
+    # ---- Artifact-commit extension ----
+    # Populated only for artifact-page saves recorded via
+    # ``FileCommitService.record_page_edit``.
+    diff = LongTextField(null=True, help_text="pre-computed unified diff (artifact-page edits)")
+    content_after_storage = CharField(max_length=16, null=True, help_text="'minio' | 'es' — where the post-save blob lives", index=True)
+    content_after_location = CharField(max_length=512, null=True, help_text="storage key/id for the post-save blob")
+    slug_kwd = CharField(max_length=512, null=True, help_text="artifact page slug (<page_type>/<name>)", index=True)
+    page_type_kwd = CharField(max_length=32, null=True, help_text="artifact page type", index=True)
 
     class Meta:
         db_table = "file_commit_item"
@@ -988,6 +978,13 @@ class FileCommitItem(DataBaseModel):
         )
 
 
+# ``ArtifactCommit`` retired — artifact page history is now stored under
+# ``FileCommit`` + ``FileCommitItem`` via ``FileCommitService.record_page_edit``
+# (see the artifact-commit extension columns on those models above).
+# Pre-existing ``artifact_commit`` rows are intentionally left in place;
+# no code path reads them.
+
+
 class Task(DataBaseModel):
     id = CharField(max_length=32, primary_key=True)
     doc_id = CharField(max_length=32, null=False, index=True)
@@ -1146,6 +1143,35 @@ class MCPServer(DataBaseModel):
         db_table = "mcp_server"
 
 
+class CompilationTemplate(DataBaseModel):
+    id = CharField(max_length=32, primary_key=True)
+    tenant_id = CharField(max_length=32, null=True, index=True)
+    group_id = CharField(max_length=32, null=True, index=True)
+    name = CharField(max_length=128, null=False, index=True)
+    description = TextField(null=True, default="")
+    kind = CharField(max_length=64, null=False, index=True)
+    config = JSONField(null=False, default={})
+    is_builtin = BooleanField(null=False, default=False, index=True)
+    status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
+
+    class Meta:
+        db_table = "compilation_template"
+        indexes = ((("tenant_id", "name", "is_builtin", "status"), True),)
+
+
+class CompilationTemplateGroup(DataBaseModel):
+    id = CharField(max_length=32, primary_key=True)
+    tenant_id = CharField(max_length=32, null=False, index=True)
+    name = CharField(max_length=128, null=False, index=True)
+    description = TextField(null=True, default="")
+    scope = CharField(max_length=16, null=False, index=True, help_text="file | dataset")
+    status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
+
+    class Meta:
+        db_table = "compilation_template_group"
+        indexes = ((("tenant_id", "name", "status"), True),)
+
+
 class Search(DataBaseModel):
     id = CharField(max_length=32, primary_key=True)
     avatar = TextField(null=True, help_text="avatar base64 string")
@@ -1264,9 +1290,9 @@ class ChatChannel(DataBaseModel):
 
 
 class DateTimeTzField(CharField):
-    field_type = 'VARCHAR'
+    field_type = "VARCHAR"
 
-    def db_value(self, value: datetime|None) -> str|None:
+    def db_value(self, value: datetime | None) -> str | None:
         if value is not None:
             if value.tzinfo is not None:
                 return value.isoformat()
@@ -1274,11 +1300,12 @@ class DateTimeTzField(CharField):
                 return value.replace(tzinfo=timezone.utc).isoformat()
         return value
 
-    def python_value(self, value: str|None) -> datetime|None:
+    def python_value(self, value: str | None) -> datetime | None:
         if value is not None:
             dt = datetime.fromisoformat(value)
             if dt.tzinfo is None:
                 import pytz
+
                 return dt.replace(tzinfo=pytz.UTC)
             return dt
         return value
@@ -1307,6 +1334,7 @@ class SyncLogs(DataBaseModel):
 
 class EvaluationDataset(DataBaseModel):
     """Ground truth dataset for RAG evaluation"""
+
     id = CharField(max_length=32, primary_key=True)
     tenant_id = CharField(max_length=32, null=False, index=True, help_text="tenant ID")
     name = CharField(max_length=255, null=False, index=True, help_text="dataset name")
@@ -1323,6 +1351,7 @@ class EvaluationDataset(DataBaseModel):
 
 class EvaluationCase(DataBaseModel):
     """Individual test case in an evaluation dataset"""
+
     id = CharField(max_length=32, primary_key=True)
     dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
     question = TextField(null=False, help_text="test question")
@@ -1338,6 +1367,7 @@ class EvaluationCase(DataBaseModel):
 
 class EvaluationRun(DataBaseModel):
     """A single evaluation run"""
+
     id = CharField(max_length=32, primary_key=True)
     dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
     dialog_id = CharField(max_length=32, null=False, index=True, help_text="dialog configuration being evaluated")
@@ -1355,6 +1385,7 @@ class EvaluationRun(DataBaseModel):
 
 class EvaluationResult(DataBaseModel):
     """Result for a single test case in an evaluation run"""
+
     id = CharField(max_length=32, primary_key=True)
     run_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_runs")
     case_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_cases")
@@ -1375,7 +1406,7 @@ class Memory(DataBaseModel):
     avatar = TextField(null=True, help_text="avatar base64 string")
     tenant_id = CharField(max_length=32, null=False, index=True)
     memory_type = IntegerField(null=False, default=1, index=True, help_text="Bit flags (LSB->MSB): 1=raw, 2=semantic, 4=episodic, 8=procedural. E.g., 5 enables raw + episodic.")
-    storage_type = CharField(max_length=32, default='table', null=False, index=True, help_text="table|graph")
+    storage_type = CharField(max_length=32, default="table", null=False, index=True, help_text="table|graph")
     embd_id = CharField(max_length=128, null=False, index=False, help_text="embedding model ID")
     tenant_embd_id = IntegerField(null=True, help_text="id in tenant_llm", index=True)
     llm_id = CharField(max_length=128, null=False, index=False, help_text="chat model ID")
@@ -1391,14 +1422,17 @@ class Memory(DataBaseModel):
     class Meta:
         db_table = "memory"
 
+
 class SystemSettings(DataBaseModel):
     name = CharField(max_length=128, primary_key=True)
     source = CharField(max_length=32, null=False, index=False)
     data_type = CharField(max_length=32, null=False, index=False)
     value = TextField(null=False, help_text="Configuration value (JSON, string, etc.)")
+
     class Meta:
         db_table = "system_settings"
 
+
 class TenantModelProvider(DataBaseModel):
     id = CharField(max_length=32, primary_key=True)
     provider_name = CharField(max_length=128, null=False, index=False, help_text="LLM provider name")
@@ -1406,9 +1440,8 @@ class TenantModelProvider(DataBaseModel):
 
     class Meta:
         db_table = "tenant_model_provider"
-        indexes = (
-            (("tenant_id", "provider_name"), True),
-        )
+        indexes = ((("tenant_id", "provider_name"), True),)
+
 
 class TenantModelInstance(DataBaseModel):
     id = CharField(max_length=32, primary_key=True)
@@ -1444,6 +1477,7 @@ class TenantModelGroup(DataBaseModel):
     class Meta:
         db_table = "tenant_model_group"
 
+
 class TenantModelGroupMapping(DataBaseModel):
     group_id = CharField(max_length=32, null=False, index=True, help_text="Group ID")
     provider_id = CharField(max_length=32, null=False, index=False)
@@ -1462,12 +1496,9 @@ def alter_db_add_column(migrator, table_name, column_name, column_type):
         migrate(migrator.add_column(table_name, column_name, column_type))
     except OperationalError as ex:
         error_codes = [1060]
-        error_messages = ['Duplicate column name']
+        error_messages = ["Duplicate column name"]
 
-        should_skip_error = (
-                (hasattr(ex, 'args') and ex.args and ex.args[0] in error_codes) or
-                (str(ex) in error_messages)
-        )
+        should_skip_error = (hasattr(ex, "args") and ex.args and ex.args[0] in error_codes) or (str(ex) in error_messages)
 
         if not should_skip_error:
             logging.critical(f"Failed to add {settings.DATABASE_TYPE.upper()}.{table_name} column {column_name}, operation error: {ex}")
@@ -1476,6 +1507,7 @@ def alter_db_add_column(migrator, table_name, column_name, column_type):
         logging.critical(f"Failed to add {settings.DATABASE_TYPE.upper()}.{table_name} column {column_name}, error: {ex}")
         pass
 
+
 def alter_db_column_type(migrator, table_name, column_name, new_column_type):
     try:
         migrate(migrator.alter_column_type(table_name, column_name, new_column_type))
@@ -1483,6 +1515,7 @@ def alter_db_column_type(migrator, table_name, column_name, new_column_type):
         logging.critical(f"Failed to alter {settings.DATABASE_TYPE.upper()}.{table_name} column {column_name} type, error: {ex}")
         pass
 
+
 def alter_db_rename_column(migrator, table_name, old_column_name, new_column_name):
     try:
         migrate(migrator.rename_column(table_name, old_column_name, new_column_name))
@@ -1491,6 +1524,7 @@ def alter_db_rename_column(migrator, table_name, old_column_name, new_column_nam
         # logging.critical(f"Failed to rename {settings.DATABASE_TYPE.upper()}.{table_name} column {old_column_name} to {new_column_name}, error: {ex}")
         pass
 
+
 def migrate_add_unique_email(migrator):
     """Deduplicates user emails and add UNIQUE constraint to email column (idempotent)"""
     # step 0: check existing index state on user.email and prepare for unique constraint
@@ -1535,13 +1569,7 @@ def migrate_add_unique_email(migrator):
         duplicates = User.select(User.email).group_by(User.email).having(fn.COUNT(User.id) > 1).tuples()
         for (dup_email,) in duplicates:
             # Keep the superuser row, or the oldest row if there is no superuser
-            rows = list(
-                User
-                    .select(User.id)
-                    .where(User.email == dup_email)
-                    .order_by(User.is_superuser.desc(), User.create_time.asc())
-                    .tuples()
-            )
+            rows = list(User.select(User.id).where(User.email == dup_email).order_by(User.is_superuser.desc(), User.create_time.asc()).tuples())
             for (uid,) in rows[1:]:
                 new_email = f"{dup_email}_DUPLICATE_{uid[:8]}"
                 User.update(email=new_email).where(User.id == uid).execute()
@@ -1564,7 +1592,6 @@ def migrate_add_unique_email(migrator):
         logging.critical("Failed to add UNIQUE constraint on user.email: %s", ex)
 
 
-
 def update_tenant_llm_to_id_primary_key():
     """Add ID and set to primary key step by step."""
     if settings.DATABASE_TYPE.upper() == "POSTGRES":
@@ -1749,6 +1776,10 @@ def migrate_db():
     alter_db_add_column(migrator, "knowledgebase", "raptor_task_finish_at", CharField(null=True))
     alter_db_add_column(migrator, "knowledgebase", "mindmap_task_id", CharField(max_length=32, null=True, help_text="Mindmap task ID", index=True))
     alter_db_add_column(migrator, "knowledgebase", "mindmap_task_finish_at", CharField(null=True))
+    alter_db_add_column(migrator, "knowledgebase", "artifact_task_id", CharField(max_length=32, null=True, help_text="Artifact compilation task ID", index=True))
+    alter_db_add_column(migrator, "knowledgebase", "artifact_task_finish_at", DateTimeField(null=True))
+    alter_db_add_column(migrator, "knowledgebase", "skill_task_id", CharField(max_length=32, null=True, help_text="Skill generation task ID", index=True))
+    alter_db_add_column(migrator, "knowledgebase", "skill_task_finish_at", DateTimeField(null=True))
     alter_db_column_type(migrator, "tenant_llm", "api_key", TextField(null=True, help_text="API KEY"))
     alter_db_add_column(migrator, "tenant_llm", "status", CharField(max_length=1, null=False, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True))
     alter_db_add_column(migrator, "connector2kb", "auto_parse", CharField(max_length=1, null=False, default="1", index=False))
@@ -1779,6 +1810,14 @@ def migrate_db():
     alter_db_add_column(migrator, "tenant", "ocr_id", CharField(max_length=128, null=True, help_text="default ocr model ID", index=True))
     alter_db_column_type(migrator, "chat_channel", "status", IntegerField(default=1, index=True))
     alter_db_rename_column(migrator, "chat_channel", "dialog_id", "chat_id")
+    # ---- FileCommit / FileCommitItem: artifact-page commit extension ----
+    alter_db_add_column(migrator, "file_commit", "title", CharField(max_length=255, null=True))
+    alter_db_add_column(migrator, "file_commit", "comments", TextField(null=True))
+    alter_db_add_column(migrator, "file_commit_item", "diff", LongTextField(null=True))
+    alter_db_add_column(migrator, "file_commit_item", "content_after_storage", CharField(max_length=16, null=True, index=True))
+    alter_db_add_column(migrator, "file_commit_item", "content_after_location", CharField(max_length=512, null=True))
+    alter_db_add_column(migrator, "file_commit_item", "slug_kwd", CharField(max_length=512, null=True, index=True))
+    alter_db_add_column(migrator, "file_commit_item", "page_type_kwd", CharField(max_length=32, null=True, index=True))
     # Drop both the explicit "idx_*" name from later migrations AND the
     # Peewee-auto-derived "<table-as-classname>_<col1>_<col2>" name from the
     # original TenantModelInstance definition (commit dc4b82523). Databases
diff --git a/api/db/init_data.py b/api/db/init_data.py
index d0fb4b9d5d..c755565a39 100644
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -21,11 +21,11 @@ import time
 import uuid
 
 from peewee import IntegrityError
-
 from api.db import UserTenantRole
 from api.db.db_models import init_database_tables as init_web_db
 from api.db.services import UserService
 from api.db.services.canvas_service import CanvasTemplateService
+from api.db.services.compilation_template_service import CompilationTemplateService
 from api.db.services.document_service import DocumentService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
@@ -43,6 +43,7 @@ DEFAULT_SUPERUSER_NICKNAME = os.getenv("DEFAULT_SUPERUSER_NICKNAME", "admin")
 DEFAULT_SUPERUSER_EMAIL = os.getenv("DEFAULT_SUPERUSER_EMAIL", "admin@ragflow.io")
 DEFAULT_SUPERUSER_PASSWORD = os.getenv("DEFAULT_SUPERUSER_PASSWORD", "admin")
 
+
 def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_EMAIL, password=DEFAULT_SUPERUSER_PASSWORD, role=UserTenantRole.OWNER):
     if UserService.query(email=email):
         logging.info("User with email %s already exists, skipping initialization.", email)
@@ -67,12 +68,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
         "img2txt_id": settings.IMAGE2TEXT_MDL,
         "rerank_id": settings.RERANK_MDL,
     }
-    usr_tenant = {
-        "tenant_id": user_info["id"],
-        "user_id": user_info["id"],
-        "invited_by": user_info["id"],
-        "role": role
-    }
+    usr_tenant = {"tenant_id": user_info["id"], "user_id": user_info["id"], "invited_by": user_info["id"], "role": role}
 
     try:
         if not UserService.save(**user_info):
@@ -83,15 +79,14 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
         return
     TenantService.insert(**tenant)
     UserTenantService.insert(**usr_tenant)
-    logging.info(
-        f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
+    logging.info(f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
 
     if tenant["llm_id"]:
         chat_model_config = get_tenant_default_model_by_type(tenant["id"], LLMType.CHAT)
         chat_mdl = LLMBundle(tenant["id"], chat_model_config)
         msg = asyncio.run(chat_mdl.async_chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={}))
         if msg.find("ERROR: ") == 0:
-            logging.error("'{}' doesn't work. {}".format( tenant["llm_id"], msg))
+            logging.error("'{}' doesn't work. {}".format(tenant["llm_id"], msg))
 
     if tenant["embd_id"]:
         embd_model_config = get_tenant_default_model_by_type(tenant["id"], LLMType.EMBEDDING)
@@ -111,7 +106,6 @@ def update_document_number_in_init():
         KnowledgebaseService.update_document_number_in_init(kb_id=kb_id, doc_num=doc_count.get(kb_id, 0))
 
 
-
 def add_graph_templates():
     dir = os.path.join(get_project_base_directory(), "agent", "templates")
     CanvasTemplateService.filter_delete([1 == 1])
@@ -136,6 +130,10 @@ def add_graph_templates():
             logging.exception("Add agent templates error for %s: %s", template_path, e)
 
 
+def add_compilation_templates():
+    CompilationTemplateService.seed_builtins_from_files()
+
+
 def init_web_data():
     start_time = time.time()
 
@@ -147,11 +145,13 @@ def init_web_data():
     #    init_superuser()
 
     add_graph_templates()
+    add_compilation_templates()
     init_message_id_sequence()
     init_memory_size_cache()
     fix_missing_tokenized_memory()
     logging.info("init web data success:{}".format(time.time() - start_time))
 
+
 def init_table():
     # init system_settings
     with open(os.path.join(get_project_base_directory(), "conf", "system_settings.json"), "r") as f:
@@ -178,6 +178,6 @@ def init_table():
             raise e
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     init_web_db()
     init_web_data()
diff --git a/api/db/init_data/compilation_templates/artifacts.yaml b/api/db/init_data/compilation_templates/artifacts.yaml
new file mode 100644
index 0000000000..42dc1f39e3
--- /dev/null
+++ b/api/db/init_data/compilation_templates/artifacts.yaml
@@ -0,0 +1,136 @@
+kind: artifacts
+display_name: Artifacts — Graph-based wiki
+config:
+  kind: artifacts
+  example: |
+      - Each page must be a proper encyclopedic article, NOT a flat bullet list:
+      - 1. Opening paragraph (2-4 sentences defining what this is). No heading.
+      - 2. Sections with H2 headings, each starting with prose before sub-bullets.
+      - 3. Bold key terms on first use; link them with [[ ]] artifactlinks.
+      - 4. Examples or implications where the source provides them.
+      - 5. ## See also section at the end with artifactlinks to highly related pages(less than 12).\n
+      - Page structure could be as following: (Not provided)
+  entity:
+    description: >-
+      You are a robust graph entity extractor for knowledge graphs.
+    fields:
+      - type: person
+        description: A natural person (individual human).
+        rule: |
+          - Full name preferred (e.g., "Elon Musk", not "Musk" alone if ambiguous).
+          - Include titles only if integral to identity (e.g., "Dr. Smith").
+          - Max length: 60 characters.
+      - type: org
+        description: Organization, company, institution, agency, or any collective group.
+        rule: |
+          - Use the official name when possible (e.g., "United Nations").
+          - Abbreviations accepted if widely known (e.g., "UN", "NASA").
+          - Max length: 80 characters.
+      - type: product
+        description: Tangible or intangible product, service, software, or offering.
+        rule: |
+          - Include version numbers if relevant (e.g., "iPhone 14").
+          - Generic categories (e.g., "smartphone") only if no specific name is given.
+          - Max length: 100 characters.
+      - type: regulation
+        description: Law, policy, standard, guideline, or regulatory document.
+        rule: |
+          - Use official title or identifier (e.g., "GDPR", "OSHA standard 1910").
+          - Include jurisdiction if known (e.g., "EU GDPR").
+          - Max length: 120 characters.
+      - type: location
+        description: Geographic place (country, city, address, region, natural feature).
+        rule: |
+          - Hierarchical format allowed (e.g., "Paris, France").
+          - Avoid overly vague terms (e.g., "there") unless resolved.
+          - Max length: 80 characters.
+      - type: system
+        description: Technical system, platform, framework, or infrastructure.
+        rule: |
+          - Distinct from product: system implies integrated environment
+            (e.g., "Linux OS", "power grid").
+          - Use proper naming.
+          - Max length: 100 characters.
+      - type: equipment
+        description: Physical device, machinery, hardware, or tool.
+        rule: |
+          - Specific model preferred (e.g., "Boeing 737").
+          - Generic allowed only if precise type (e.g., "drill press").
+          - Max length: 80 characters.
+      - type: other
+        description: Entities that do not fit any above category.
+        rule: |
+          - Use sparingly; prefer mapping to a defined type when possible.
+          - Still provide a meaningful label.
+          - Max length: 80 characters.
+  relation:
+    description: >-
+      You are an expert in extracting semantic relations between entities.
+    fields:
+      - type: owns
+        description: Ownership or possession (legal or de facto).
+        rule: |
+          - Direction from owner to owned: (A owns B).
+          - Example: "Company A owns product B".
+      - type: part_of
+        description: Mereological relation — component to whole.
+        rule: |
+          - Direction from part to whole: (A part_of B).
+          - Example: "Engine part_of car".
+      - type: caused_by
+        description: Causal relation — event, action, or state leads to another.
+        rule: |
+          - Direction from effect to cause: (A caused_by B) meaning B causes A.
+          - Example: "Accident caused_by brake failure".
+      - type: regulates
+        description: Regulatory or governing relation (law/standard controls entity).
+        rule: |
+          - Direction from regulator to regulated: (A regulates B).
+          - Example: "GDPR regulates data processing".
+      - type: uses
+        description: Utilization — an entity employs or consumes another entity.
+        rule: |
+          - Direction from user to used: (A uses B).
+          - Example: "System uses equipment".
+      - type: located_in
+        description: Spatial containment — entity situated inside a location.
+        rule: |
+          - Direction from located entity to containing location: (A located_in B).
+          - Example: "Office located_in city".
+      - type: other
+        description: Any meaningful relation not covered by the above types.
+        rule: |
+          - Provide an explicit label in a "relation_label" field.
+          - Direction must be clear.
+  claim:
+    fields:
+      - statement: >-
+          A complete factual sentence stated in the source. Any sentence of the form
+          'X is Y', 'X has Y', 'X does Y', 'X was founded in Y', 'X is located in Y',
+          'X reported Y', etc. is a claim. Aim for at least 1-3 claims per entity per
+          chunk that mentions it.
+        subject: >-
+          Entity/concept this claim is about (must match one of the entity/concept
+          names extracted above).
+  concept:
+    fields:
+      - term: >-
+          Concept name OR a thematic section topic (prefer the source's heading
+          wording when coherent).
+        definition_excerpt: >-
+          Verbatim or near-verbatim defining phrase from the chunk.
+  global_rules: |
+    - Each relation links two entities (subject → object) with a predicate type.
+    - Format: {"subject_id": "<entity_id>", "predicate": "<type>",
+      "object_id": "<entity_id>", "chunk_id": "<chunk_ID>"}.
+    - Both subject and object must be previously extracted entities.
+    - If ambiguous direction, choose the most logical default
+      (e.g., "part_of" always from part to whole).
+    - When multiple relations appear in a chunk, list all in order of appearance.
+    - Keep language consistent; relation type is always English (the given type name).
+    - Every extracted entity must have exactly one type from the list.
+    - Entity label (the text representing the entity) is required, non-empty.
+    - Format: {"entity_id": "<unique_id>", "type": "<type>", "label": "<label>",
+      "chunk_id": "<chunk_ID>"}.
+    - If no entity in a chunk, output no entity for that chunk.
+    - Keep the chunks' original language (Chinese/English etc.) for entities and relations.
diff --git a/api/db/init_data/compilation_templates/empty.yaml b/api/db/init_data/compilation_templates/empty.yaml
new file mode 100644
index 0000000000..4be03bac6b
--- /dev/null
+++ b/api/db/init_data/compilation_templates/empty.yaml
@@ -0,0 +1,17 @@
+kind: empty
+display_name: Empty
+config:
+  kind: empty
+  entity:
+    description: ''
+    fields:
+      - type: ''
+        description: ''
+        rule: ''
+  relation:
+    description: ''
+    fields:
+      - type: ''
+        description: ''
+        rule: ''
+  global_rules: ''
diff --git a/api/db/init_data/compilation_templates/knowledge_graph.yaml b/api/db/init_data/compilation_templates/knowledge_graph.yaml
new file mode 100644
index 0000000000..27a8535dd1
--- /dev/null
+++ b/api/db/init_data/compilation_templates/knowledge_graph.yaml
@@ -0,0 +1,74 @@
+kind: knowledge_graph
+display_name: Knowledge graph
+config:
+  kind: knowledge_graph
+  entity:
+    description: >-
+      You are a robust graph entity extractor for knowledge graphs.
+    fields:
+      - type: person
+        description: A natural person (individual human).
+        rule: |
+          - Full name preferred (e.g., "Elon Musk", not "Musk" alone if ambiguous).
+          - Include titles only if integral to identity (e.g., "Dr. Smith").
+          - Max length: 60 characters.
+      - type: org
+        description: Organization, company, institution, agency, or any collective group.
+        rule: |
+          - Use the official name when possible (e.g., "United Nations").
+          - Abbreviations accepted if widely known (e.g., "UN", "NASA").
+          - Max length: 80 characters.
+      - type: product
+        description: Tangible or intangible product, service, software, or offering.
+        rule: |
+          - Include version numbers if relevant (e.g., "iPhone 14").
+          - Generic categories (e.g., "smartphone") only if no specific name is given.
+          - Max length: 100 characters.
+      - type: regulation
+        description: Law, policy, standard, guideline, or regulatory document.
+        rule: |
+          - Use official title or identifier (e.g., "GDPR", "OSHA standard 1910").
+          - Include jurisdiction if known (e.g., "EU GDPR").
+          - Max length: 120 characters.
+      - type: location
+        description: Geographic place (country, city, address, region, natural feature).
+        rule: |
+          - Hierarchical format allowed (e.g., "Paris, France").
+          - Avoid overly vague terms (e.g., "there") unless resolved.
+          - Max length: 80 characters.
+      - type: other
+        description: Entities that do not fit any above category.
+        rule: |
+          - Use sparingly; prefer mapping to a defined type when possible.
+          - Still provide a meaningful label.
+          - Max length: 80 characters.
+  relation:
+    description: >-
+      You are an expert in extracting semantic relations between entities.
+    fields:
+      - type: owns
+        description: Ownership or possession (legal or de facto).
+        rule: |
+          - Direction from owner to owned: (A owns B).
+      - type: part_of
+        description: Mereological relation — component to whole.
+        rule: |
+          - Direction from part to whole: (A part_of B).
+      - type: caused_by
+        description: Causal relation — event, action, or state leads to another.
+        rule: |
+          - Direction from effect to cause: (A caused_by B) meaning B causes A.
+      - type: regulates
+        description: Regulatory or governing relation (law/standard controls entity).
+        rule: |
+          - Direction from regulator to regulated: (A regulates B).
+      - type: located_in
+        description: Spatial containment — entity situated inside a location.
+        rule: |
+          - Direction from located entity to containing location: (A located_in B).
+      - type: other
+        description: Any meaningful relation not covered by the above types.
+        rule: |
+          - Provide an explicit label in a "relation_label" field.
+          - Direction must be clear.
+  global_rules: ''
diff --git a/api/db/init_data/compilation_templates/mind_map.yaml b/api/db/init_data/compilation_templates/mind_map.yaml
new file mode 100644
index 0000000000..383be48e6e
--- /dev/null
+++ b/api/db/init_data/compilation_templates/mind_map.yaml
@@ -0,0 +1,69 @@
+kind: mind_map
+display_name: Mind map - Radial concept hierarchy
+config:
+  kind: mind_map
+  entity:
+    description: >-
+      You are a robust mind-map extractor. Extract the central concept,
+      major branches, and supporting sub-branches needed to build a
+      non-linear visual hierarchy around the main subject.
+    fields:
+      - type: central_topic
+        description: The main idea, goal, problem, or subject placed at the center of the mind map.
+        rule: |
+          - Prefer a concise noun phrase that represents the whole chunk or document section.
+          - Use the source wording when it clearly names the topic.
+          - If no central topic can be inferred, output "-1".
+          - Max length: 80 characters.
+      - type: branch
+        description: A major theme, category, or dimension radiating from the central topic.
+        rule: |
+          - Use broad categories that organize multiple related details.
+          - Avoid duplicating the central topic as a branch.
+          - Keep labels concise and parallel when possible.
+          - Max length: 80 characters.
+      - type: sub_branch
+        description: A specific detail, task, supporting concept, example, or item under a branch.
+        rule: |
+          - Attach each sub-branch to the most specific relevant parent branch.
+          - Preserve important source terms, names, quantities, and constraints.
+          - Split unrelated details into separate sub-branches.
+          - Max length: 120 characters.
+      - type: keyword
+        description: A compact supporting word or phrase that clarifies a branch or sub-branch.
+        rule: |
+          - Use only meaningful terms that improve scanability.
+          - Do not include filler words or generic labels.
+          - Max length: 60 characters.
+  relation:
+    description: >-
+      You are an expert hierarchical reasoning assistant specializing in
+      mind-map structure. Link each concept to its parent so the result
+      forms an intuitive radial hierarchy.
+    fields:
+      - type: has_branch
+        description: The central topic contains a major branch.
+        rule: |
+          - Direction from central topic to branch: (A has_branch B).
+          - Use only for first-level branches radiating from the center.
+      - type: has_sub_branch
+        description: A branch or sub-branch contains a more specific child concept.
+        rule: |
+          - Direction from parent to child: (A has_sub_branch B).
+          - Use recursively for second-level and deeper details.
+      - type: supports
+        description: A keyword, example, task, or detail supports a parent concept.
+        rule: |
+          - Direction from supporting item to supported concept: (A supports B).
+          - Use when the item explains, evidences, or clarifies the parent.
+      - type: related_to
+        description: A cross-link between two concepts that are associated but not parent-child.
+        rule: |
+          - Use sparingly; prefer parent-child hierarchy when possible.
+          - Direction may follow the stronger explanatory dependency.
+  global_rules: |
+    - Build a mind map around one central concept whenever possible.
+    - Arrange major themes as first-level branches and details as recursive sub-branches.
+    - Keep labels short enough for visual nodes; avoid sentence-length node names.
+    - Preserve a clear hierarchy; do not create cycles in parent-child relations.
+    - Use the same language as the source text unless normalization is necessary.
diff --git a/api/db/init_data/compilation_templates/page_index.yaml b/api/db/init_data/compilation_templates/page_index.yaml
new file mode 100644
index 0000000000..a21ed8eed9
--- /dev/null
+++ b/api/db/init_data/compilation_templates/page_index.yaml
@@ -0,0 +1,33 @@
+kind: page_index
+display_name: PageIndex — Hierarchical table of contents
+config:
+  kind: page_index
+  entity:
+    description: >-
+      You are a robust Table-of-Contents (TOC) extractor.
+    fields:
+      - type: title
+        description: the heading text (clean, no page numbers or leader dots)
+        rule: |
+          - Length restriction:
+              • Chinese heading: ≤25 characters
+              • English heading: ≤80 characters
+          - "title" must be non-empty (or exactly "-1").
+          - If any part of a chunk has no valid heading, output that part as {"title":"-1", ...}.
+          - If a chunk contains multiple headings, expand them in order:
+              - Each heading → {"title":"...","chunk_id":"<chunk_ID>"}.
+          - When ambiguous, prefer "-1" unless the text strongly looks like a heading.
+          - Keep language of "title" the same as the input.
+          - Prefix like following must be titles: 第x章, 第N条, 第N节, 1, 1.1, 1.1.1 ...
+  relation:
+    description: >-
+      You are an expert logical reasoning assistant specializing in hierarchical titles.
+    fields:
+      - type: include
+        description: Upper-level title includes lower-level title.
+        rule: |
+          - "-1" is an invalid title; it does not belong to or include any other titles.
+          - Must follow the hierarchical index/numbering (e.g., "1", "2.1", "3.2.5") when present.
+          - Keep language of "title" the same as the input.
+          - 第N章 must include 第N条 or 第N节.
+  global_rules: ''
diff --git a/api/db/init_data/compilation_templates/timeline.yaml b/api/db/init_data/compilation_templates/timeline.yaml
new file mode 100644
index 0000000000..00bee1485d
--- /dev/null
+++ b/api/db/init_data/compilation_templates/timeline.yaml
@@ -0,0 +1,48 @@
+kind: timeline
+display_name: List (Timeline) — Chronological events / Graph
+config:
+  kind: timeline
+  entity:
+    description: >-
+      You are a robust events-timeline extractor.
+    fields:
+      - type: timestamp
+        description: the date or time reference (clean, no extraneous text)
+        rule: |
+          - Format: prefer ISO 8601 (YYYY-MM-DD) or a normalized human-readable form
+            (e.g., "March 5, 2024").
+          - If only a relative time (e.g., "yesterday", "next week"), convert to
+            absolute when the context allows, else keep as is.
+          - "timestamp" must be non-empty (or exactly "-1" if no valid time/date).
+          - If a chunk contains multiple events with distinct timestamps, expand them
+            in chronological order:
+              - Each event → {"timestamp":"...","event":"...","chunk_id":"<chunk_ID>"}.
+          - When ambiguous, prefer "-1" unless the text strongly indicates a specific
+            time/date.
+          - Keep language and numbering style of "timestamp" consistent with the input.
+      - type: event
+        description: the event description associated with the timestamp (concise, no metadata)
+        rule: |
+          - Length restriction:
+              • Chinese event: ≤40 characters
+              • English event: ≤120 characters
+          - "event" must be non-empty (or exactly "-1") if no valid event description.
+          - If no valid event but a timestamp exists, output
+            {"timestamp":"...","event":"-1", ...}.
+          - Preserve the core action and key entities; omit redundant phrasing.
+  relation:
+    description: >-
+      You are an expert sequential reasoning assistant specializing in chronological
+      timelines.
+    fields:
+      - type: ordered
+        description: Events are arranged in strict chronological order (earliest to latest).
+        rule: |
+          - "-1" for timestamp or event indicates invalid or missing data; such entries
+            do not participate in ordering.
+          - Must follow explicit or inferred temporal indicators
+            (e.g., "then", "afterwards", "at 3 PM").
+          - If multiple events share the same timestamp, preserve their textual order
+            as a sub-list.
+          - Keep language and date formatting consistent across the timeline.
+  global_rules: ''
diff --git a/api/db/init_data/compilation_templates/tree.yaml b/api/db/init_data/compilation_templates/tree.yaml
new file mode 100644
index 0000000000..e241c929a1
--- /dev/null
+++ b/api/db/init_data/compilation_templates/tree.yaml
@@ -0,0 +1,31 @@
+kind: tree
+display_name: Tree — RAPTOR-based document tree
+description: >-
+  Recursive Abstractive Processing for Tree-Organized Retrieval over a
+  single document's chunks. Produces a hierarchical summary tree
+  (root summary → cluster summaries → leaf cluster source_chunk_ids).
+config:
+  kind: tree
+  raptor:
+    prompt: |-
+      Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:
+            {cluster_content}
+      The above is the content you need to summarize.
+    max_token: 512
+    threshold: 0.1
+  # Entity / relation collections are not used by the tree kind but kept
+  # as empty stubs so the form schema's shared validators see a stable
+  # shape regardless of kind.
+  entity:
+    description: ''
+    fields:
+      - type: ''
+        description: ''
+        rule: ''
+  relation:
+    description: ''
+    fields:
+      - type: ''
+        description: ''
+        rule: ''
+  global_rules: ''
diff --git a/api/db/services/compilation_template_group_service.py b/api/db/services/compilation_template_group_service.py
new file mode 100644
index 0000000000..cd5d5c7efd
--- /dev/null
+++ b/api/db/services/compilation_template_group_service.py
@@ -0,0 +1,394 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from peewee import fn
+
+from api.db.db_models import DB, CompilationTemplate, CompilationTemplateGroup
+from api.db.services.common_service import CommonService
+from common.constants import StatusEnum
+from common.misc_utils import get_uuid
+
+
+SCOPE_FILE = "file"
+SCOPE_DATASET = "dataset"
+
+
+class GroupValidationError(ValueError):
+    pass
+
+
+def _derive_scope(templates: list[dict]) -> str:
+    """Derive the group's scope from its child templates.
+
+    One artifacts child = dataset scope (and must be the only child).
+    Otherwise file scope, with no artifacts allowed.
+    """
+    if not templates:
+        raise GroupValidationError("A template group must contain at least one template.")
+    kinds = [str((t or {}).get("kind") or "").strip() for t in templates]
+    artifact_count = sum(1 for k in kinds if k == "artifacts")
+    if artifact_count > 0:
+        if artifact_count != 1 or len(templates) != 1:
+            raise GroupValidationError("An artifacts template cannot be combined with other templates in the same group.")
+        return SCOPE_DATASET
+
+    _enforce_single_rechunk_tree(templates)
+    return SCOPE_FILE
+
+
+def _enforce_single_rechunk_tree(templates: list[dict]) -> None:
+    """At most one tree-kind child in the group may enable re-chunking.
+
+    Re-chunking soft-deletes the doc's original chunks via
+    ``available_int=0`` and inserts merged replacements; running two
+    such templates would race on the same source chunks and produce
+    non-deterministic output. Per-tenant invariant is enforced
+    server-side here and mirrored client-side in
+    ``group-interface.ts``.
+    """
+    rechunk_trees = 0
+    for t in templates:
+        if str((t or {}).get("kind") or "").strip() != "tree":
+            continue
+        cfg = (t or {}).get("config") or {}
+        raptor = (cfg or {}).get("raptor") or {}
+        if bool(raptor.get("rechunk")):
+            rechunk_trees += 1
+    if rechunk_trees > 1:
+        raise GroupValidationError("Only one tree template in a group may enable re-chunking.")
+
+
+class CompilationTemplateGroupService(CommonService):
+    model = CompilationTemplateGroup
+
+    @classmethod
+    def ensure_table(cls) -> None:
+        if not cls.model.table_exists():
+            cls.model.create_table(safe=True)
+
+    # ------------------------------------------------------------------
+    # Read paths
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def _group_to_dict(cls, group: CompilationTemplateGroup, templates: list[CompilationTemplate]) -> dict:
+        from api.db.services.compilation_template_service import CompilationTemplateService
+
+        return {
+            "id": group.id,
+            "name": group.name,
+            "description": group.description or "",
+            "scope": group.scope,
+            "create_time": group.create_time,
+            "update_time": group.update_time,
+            "templates": [CompilationTemplateService._to_saved_dict(t) for t in templates],
+        }
+
+    @classmethod
+    @DB.connection_context()
+    def list_saved(
+        cls,
+        tenant_id: str,
+        keywords: str = "",
+        scope: str = "",
+        orderby: str = "create_time",
+        desc: bool = True,
+    ) -> list[dict]:
+        cls.ensure_table()
+        query = cls.model.select().where(
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if keywords:
+            query = query.where(cls.model.name.contains(keywords))
+        if scope:
+            query = query.where(cls.model.scope == scope)
+        if not hasattr(cls.model, orderby):
+            orderby = "create_time"
+        order_field = getattr(cls.model, orderby)
+        query = query.order_by(order_field.desc() if desc else order_field.asc())
+
+        groups = list(query)
+        if not groups:
+            return []
+
+        group_ids = [g.id for g in groups]
+        children = list(
+            CompilationTemplate.select()
+            .where(
+                CompilationTemplate.group_id.in_(group_ids),
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            )
+            .order_by(CompilationTemplate.create_time.asc())
+        )
+        children_by_group: dict[str, list[CompilationTemplate]] = {gid: [] for gid in group_ids}
+        for child in children:
+            children_by_group.setdefault(child.group_id, []).append(child)
+
+        return [cls._group_to_dict(g, children_by_group.get(g.id, [])) for g in groups]
+
+    @classmethod
+    @DB.connection_context()
+    def get_saved(cls, group_id: str, tenant_id: str) -> dict | None:
+        group = cls.model.get_or_none(
+            cls.model.id == group_id,
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if not group:
+            return None
+        children = list(
+            CompilationTemplate.select()
+            .where(
+                CompilationTemplate.group_id == group_id,
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            )
+            .order_by(CompilationTemplate.create_time.asc())
+        )
+        return cls._group_to_dict(group, children)
+
+    @classmethod
+    @DB.connection_context()
+    def list_for_resolution(cls, tenant_id: str) -> list[dict]:
+        """Light list used by frontend pickers (dataset parse-config dropdown).
+
+        Returns one row per group with just the fields the picker needs +
+        the child template ids so the orchestrator can resolve them later.
+        """
+        cls.ensure_table()
+        groups = list(
+            cls.model.select().where(
+                cls.model.tenant_id == tenant_id,
+                cls.model.status == StatusEnum.VALID.value,
+            )
+        )
+        if not groups:
+            return []
+        group_ids = [g.id for g in groups]
+        kid_pairs = list(
+            CompilationTemplate.select(
+                CompilationTemplate.group_id,
+                CompilationTemplate.id,
+                CompilationTemplate.kind,
+                CompilationTemplate.name,
+            ).where(
+                CompilationTemplate.group_id.in_(group_ids),
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            )
+        )
+        by_group: dict[str, list[dict]] = {}
+        for child in kid_pairs:
+            by_group.setdefault(child.group_id, []).append({"id": child.id, "kind": child.kind, "name": child.name})
+        return [
+            {
+                "id": g.id,
+                "name": g.name,
+                "description": g.description or "",
+                "scope": g.scope,
+                "templates": by_group.get(g.id, []),
+            }
+            for g in groups
+        ]
+
+    @classmethod
+    @DB.connection_context()
+    def name_exists(cls, tenant_id: str, name: str, exclude_id: str = "") -> bool:
+        cls.ensure_table()
+        query = cls.model.select(fn.COUNT(cls.model.id)).where(
+            cls.model.tenant_id == tenant_id,
+            cls.model.name == name,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if exclude_id:
+            query = query.where(cls.model.id != exclude_id)
+        return query.scalar() > 0
+
+    @classmethod
+    @DB.connection_context()
+    def resolve_template_ids(cls, group_id: str, tenant_id: str) -> list[str]:
+        """Resolve a group id to its child template ids. Used by the orchestrator
+        when reading ``parser_config.compilation_template_group_id``.
+        """
+        cls.ensure_table()
+        group = cls.model.get_or_none(
+            cls.model.id == group_id,
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if not group:
+            return []
+        rows = list(
+            CompilationTemplate.select(CompilationTemplate.id)
+            .where(
+                CompilationTemplate.group_id == group_id,
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            )
+            .order_by(CompilationTemplate.create_time.asc())
+        )
+        return [r.id for r in rows]
+
+    # ------------------------------------------------------------------
+    # Write paths
+    # ------------------------------------------------------------------
+
+    @classmethod
+    @DB.connection_context()
+    def create_group(cls, tenant_id: str, name: str, description: str, templates: list[dict]) -> dict:
+        cls.ensure_table()
+        scope = _derive_scope(templates)
+        group_id = get_uuid()
+        with DB.atomic():
+            CompilationTemplateGroup.create(
+                id=group_id,
+                tenant_id=tenant_id,
+                name=name,
+                description=description or "",
+                scope=scope,
+                status=StatusEnum.VALID.value,
+            )
+            for i, child in enumerate(templates):
+                cls._insert_child(group_id, tenant_id, child, index=i)
+        saved = cls.get_saved(group_id, tenant_id)
+        assert saved is not None
+        return saved
+
+    @classmethod
+    @DB.connection_context()
+    def update_group(
+        cls,
+        group_id: str,
+        tenant_id: str,
+        name: str | None,
+        description: str | None,
+        templates: list[dict] | None,
+    ) -> dict | None:
+        cls.ensure_table()
+        existing = cls.model.get_or_none(
+            cls.model.id == group_id,
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if not existing:
+            return None
+
+        with DB.atomic():
+            updates: dict = {}
+            if name is not None:
+                updates["name"] = name
+            if description is not None:
+                updates["description"] = description
+            if templates is not None:
+                updates["scope"] = _derive_scope(templates)
+            if updates:
+                cls.model.update(**updates).where(cls.model.id == group_id).execute()
+
+            if templates is not None:
+                # Soft-delete previous children (en-bloc replace). Simpler than
+                # diffing and acceptable given small N — child IDs are not
+                # referenced externally (parser_config keys the group, not its
+                # children).
+                CompilationTemplate.update(status=StatusEnum.INVALID.value).where(
+                    CompilationTemplate.group_id == group_id,
+                    CompilationTemplate.status == StatusEnum.VALID.value,
+                ).execute()
+                for i, child in enumerate(templates):
+                    cls._insert_child(group_id, tenant_id, child, index=i)
+
+        return cls.get_saved(group_id, tenant_id)
+
+    @classmethod
+    @DB.connection_context()
+    def delete_group(cls, group_id: str, tenant_id: str) -> bool:
+        cls.ensure_table()
+        existing = cls.model.get_or_none(
+            cls.model.id == group_id,
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if not existing:
+            return False
+        with DB.atomic():
+            cls.model.update(status=StatusEnum.INVALID.value).where(cls.model.id == group_id).execute()
+            CompilationTemplate.update(status=StatusEnum.INVALID.value).where(
+                CompilationTemplate.group_id == group_id,
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            ).execute()
+        return True
+
+    @classmethod
+    def _insert_child(
+        cls,
+        group_id: str,
+        tenant_id: str,
+        child: dict,
+        *,
+        index: int,
+    ) -> None:
+        kind = str((child or {}).get("kind") or "").strip()
+        name = str((child or {}).get("name") or "").strip()
+        config = (child or {}).get("config") or {}
+        if not kind or not name or not isinstance(config, dict):
+            raise GroupValidationError("Each template must include a name, kind, and config object.")
+        from api.db.services.compilation_template_service import CompilationTemplateService
+
+        config = CompilationTemplateService.fill_config_default_llm(config, tenant_id)
+        template_id = get_uuid()
+        CompilationTemplate.create(
+            id=template_id,
+            tenant_id=tenant_id,
+            group_id=group_id,
+            name=name,
+            description=str((child or {}).get("description") or ""),
+            kind=kind,
+            config=config,
+            is_builtin=False,
+            status=StatusEnum.VALID.value,
+        )
+
+    # ------------------------------------------------------------------
+    # Lookup helpers used by the orchestrator
+    # ------------------------------------------------------------------
+
+    @classmethod
+    @DB.connection_context()
+    def get_for_kb(cls, group_id: str, tenant_id: str) -> dict | None:
+        """Like ``get_saved`` but returns ``None`` quietly and avoids the
+        ``_to_saved_dict`` LLM-lookup branch — for orchestrator use where
+        we only need the scope + child rows.
+        """
+        cls.ensure_table()
+        group = cls.model.get_or_none(
+            cls.model.id == group_id,
+            cls.model.tenant_id == tenant_id,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if not group:
+            return None
+        children = list(
+            CompilationTemplate.select()
+            .where(
+                CompilationTemplate.group_id == group_id,
+                CompilationTemplate.status == StatusEnum.VALID.value,
+            )
+            .order_by(CompilationTemplate.create_time.asc())
+        )
+        return {
+            "id": group.id,
+            "name": group.name,
+            "scope": group.scope,
+            "template_ids": [c.id for c in children],
+            "templates_by_kind": {c.kind: c.id for c in children},
+        }
diff --git a/api/db/services/compilation_template_service.py b/api/db/services/compilation_template_service.py
new file mode 100644
index 0000000000..525b656a15
--- /dev/null
+++ b/api/db/services/compilation_template_service.py
@@ -0,0 +1,231 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import logging
+import os
+
+from peewee import fn
+from ruamel.yaml import YAML
+
+from api.db.db_models import CompilationTemplate, DB
+from api.db.services.common_service import CommonService
+from common.constants import StatusEnum
+from common.file_utils import get_project_base_directory
+
+
+class CompilationTemplateService(CommonService):
+    model = CompilationTemplate
+
+    @classmethod
+    def fill_config_default_llm(cls, config: dict, tenant_id: str | None) -> dict:
+        if not isinstance(config, dict) or config.get("llm_id") or not tenant_id:
+            return config
+        try:
+            from api.db.services.user_service import TenantService
+
+            ok, tenant = TenantService.get_by_id(tenant_id)
+            if ok and getattr(tenant, "llm_id", None):
+                config = dict(config)
+                config["llm_id"] = tenant.llm_id
+        except Exception:
+            logging.exception(
+                "compilation_template: llm_id default-fill lookup failed for tenant=%s",
+                tenant_id,
+            )
+        return config
+
+    @classmethod
+    def fill_default_llm_for_templates(cls, templates: list[dict], tenant_id: str | None) -> list[dict]:
+        if not tenant_id:
+            return templates
+        filled = []
+        for template in templates:
+            item = dict(template)
+            item["config"] = cls.fill_config_default_llm(item.get("config") or {}, tenant_id)
+            filled.append(item)
+        return filled
+
+    @classmethod
+    def _sort_builtins(cls, templates: list[dict]) -> list[dict]:
+        return sorted(
+            templates,
+            key=lambda template: (
+                template.get("kind") == "empty" or template.get("id") == "empty",
+                template.get("display_name") or template.get("name") or "",
+            ),
+        )
+
+    @classmethod
+    @DB.connection_context()
+    def ensure_table(cls) -> None:
+        if not cls.model.table_exists():
+            cls.model.create_table(safe=True)
+
+    @classmethod
+    def _to_saved_dict(cls, template: CompilationTemplate) -> dict:
+        data = template.to_dict()
+        config = data.get("config") or {}
+        # Lazy-fill llm_id with the tenant's default chat model so the
+        # frontend always sees a value (legacy templates predate the
+        # field). The DB row is left untouched — this is a read-side
+        # default. If the tenant has no default chat model set,
+        # silently leave llm_id absent and let the caller fall back
+        # however it likes.
+        if isinstance(config, dict) and not config.get("llm_id"):
+            tenant_id = data.get("tenant_id")
+            if tenant_id:
+                try:
+                    from api.db.services.user_service import TenantService
+
+                    ok, tenant = TenantService.get_by_id(tenant_id)
+                    if ok and getattr(tenant, "llm_id", None):
+                        config = dict(config)
+                        config["llm_id"] = tenant.llm_id
+                except Exception:
+                    logging.exception(
+                        "compilation_template: llm_id lazy-fill lookup failed for tenant=%s",
+                        tenant_id,
+                    )
+        return {
+            "id": data["id"],
+            "name": data["name"],
+            "description": data.get("description") or "",
+            "kind": data["kind"],
+            "config": cls.fill_config_default_llm(config, data.get("tenant_id")),
+            "create_time": data.get("create_time"),
+            "update_time": data.get("update_time"),
+        }
+
+    @classmethod
+    def _to_builtin_dict(cls, template: CompilationTemplate) -> dict:
+        data = template.to_dict()
+        return {
+            "id": data["id"],
+            "kind": data["kind"],
+            "display_name": data["name"],
+            "description": data.get("description") or "",
+            "config": data.get("config") or {},
+        }
+
+    @classmethod
+    @DB.connection_context()
+    def list_saved(cls, tenant_id: str, keywords: str = "", kind: str = "", orderby: str = "create_time", desc: bool = True) -> list[dict]:
+        query = cls.model.select().where(
+            cls.model.tenant_id == tenant_id,
+            not cls.model.is_builtin,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if keywords:
+            query = query.where(cls.model.name.contains(keywords))
+        if kind:
+            query = query.where(cls.model.kind == kind)
+        if not hasattr(cls.model, orderby):
+            orderby = "create_time"
+        order_field = getattr(cls.model, orderby)
+        query = query.order_by(order_field.desc() if desc else order_field.asc())
+        return [cls._to_saved_dict(template) for template in query]
+
+    @classmethod
+    @DB.connection_context()
+    def list_builtins(cls) -> list[dict]:
+        cls.ensure_table()
+        query = cls.model.select().where(cls.model.is_builtin, cls.model.status == StatusEnum.VALID.value).order_by(cls.model.create_time.asc(), cls.model.name.asc())
+        return cls._sort_builtins([cls._to_builtin_dict(template) for template in query])
+
+    @classmethod
+    @DB.connection_context()
+    def get_saved(cls, template_id: str, tenant_id: str) -> dict | None:
+        template = cls.model.get_or_none(
+            cls.model.id == template_id,
+            cls.model.tenant_id == tenant_id,
+            not cls.model.is_builtin,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        return cls._to_saved_dict(template) if template else None
+
+    @classmethod
+    @DB.connection_context()
+    def name_exists(cls, tenant_id: str, name: str, exclude_id: str = "") -> bool:
+        query = cls.model.select(fn.COUNT(cls.model.id)).where(
+            cls.model.tenant_id == tenant_id,
+            cls.model.name == name,
+            not cls.model.is_builtin,
+            cls.model.status == StatusEnum.VALID.value,
+        )
+        if exclude_id:
+            query = query.where(cls.model.id != exclude_id)
+        return query.scalar() > 0
+
+    @classmethod
+    @DB.connection_context()
+    def upsert_builtin(cls, template: dict) -> None:
+        template_id = template["id"]
+        existing = cls.model.get_or_none(cls.model.id == template_id)
+        data = {
+            "id": template_id,
+            "tenant_id": None,
+            "name": template["name"],
+            "description": template.get("description", ""),
+            "kind": template["kind"],
+            "config": template["config"],
+            "is_builtin": True,
+            "status": StatusEnum.VALID.value,
+        }
+        if existing:
+            cls.update_by_id(template_id, data)
+        else:
+            cls.insert(**data)
+
+    @classmethod
+    def seed_builtins_from_files(cls) -> None:
+        cls.ensure_table()
+        for template in cls.load_builtins_from_files():
+            cls.upsert_builtin(template)
+
+    @classmethod
+    def load_builtins_from_files(cls) -> list[dict]:
+        template_dir = os.path.join(get_project_base_directory(), "api", "db", "init_data", "compilation_templates")
+        if not os.path.exists(template_dir):
+            logging.warning("Missing compilation templates: %s", template_dir)
+            return []
+
+        templates = []
+        yaml = YAML(typ="safe", pure=True)
+        for filename in sorted(os.listdir(template_dir)):
+            if not filename.endswith((".yaml", ".yml")):
+                continue
+            template_path = os.path.join(template_dir, filename)
+            try:
+                with open(template_path, "r", encoding="utf-8") as f:
+                    template = yaml.load(f) or {}
+                kind = template.get("kind")
+                display_name = template.get("display_name")
+                config = template.get("config")
+                if not kind or not display_name or not isinstance(config, dict):
+                    logging.warning("Skipping invalid compilation template file: %s", template_path)
+                    continue
+                templates.append(
+                    {
+                        "id": os.path.splitext(filename)[0],
+                        "name": display_name,
+                        "description": template.get("description", ""),
+                        "kind": kind,
+                        "config": config,
+                    }
+                )
+            except Exception as e:
+                logging.exception("Add compilation template error for %s: %s", template_path, e)
+        return cls._sort_builtins(templates)
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index f7c53a9df3..cce2479f26 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -42,7 +42,7 @@ from api.utils.reference_metadata_utils import (
 from api.db.joint_services.tenant_model_service import get_tenant_default_model_by_type, get_model_config_from_provider_instance, get_model_type_by_name
 from common.time_utils import current_timestamp, datetime_format
 from common.text_utils import normalize_arabic_digits
-from rag.graphrag.general.mind_map_extractor import MindMapExtractor
+from rag.advanced_rag.knowlege_compile.mind_map_extractor import MindMapExtractor
 from rag.advanced_rag import DeepResearcher
 from rag.app.tag import label_question
 from rag.nlp.search import index_name
@@ -761,6 +761,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
     retrieval_ts = timer()
     if not knowledges and prompt_config.get("empty_response"):
         empty_res = prompt_config["empty_response"]
+        yield {"answer": empty_res, "reference": {}, "prompt": "", "audio_binary": None, "final": False}
         yield {"answer": empty_res, "reference": kbinfos, "prompt": "\n\n### Query:\n%s" % " ".join(questions), "audio_binary": tts(tts_mdl, empty_res), "final": True}
         return
 
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index d5bff3e200..1fae046c95 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -373,10 +373,14 @@ class DocumentService(CommonService):
     @DB.connection_context()
     def list_doc_headers_by_kb_and_source_type(cls, kb_id, source_type, page_size=500):
         fields = [cls.model.id, cls.model.kb_id, cls.model.source_type, cls.model.name]
-        docs = cls.model.select(*fields).where(
-            cls.model.kb_id == kb_id,
-            cls.model.source_type == source_type,
-        ).order_by(cls.model.create_time.asc())
+        docs = (
+            cls.model.select(*fields)
+            .where(
+                cls.model.kb_id == kb_id,
+                cls.model.source_type == source_type,
+            )
+            .order_by(cls.model.create_time.asc())
+        )
         offset = 0
         res = []
         while True:
@@ -402,10 +406,14 @@ class DocumentService(CommonService):
         rows and the resulting map would silently miss entries.
         """
         fields = [cls.model.id, cls.model.content_hash]
-        docs = cls.model.select(*fields).where(
-            cls.model.kb_id == kb_id,
-            cls.model.source_type == source_type,
-        ).order_by(cls.model.create_time.asc())
+        docs = (
+            cls.model.select(*fields)
+            .where(
+                cls.model.kb_id == kb_id,
+                cls.model.source_type == source_type,
+            )
+            .order_by(cls.model.create_time.asc())
+        )
         offset = 0
         result: dict[str, str] = {}
         while True:
@@ -489,6 +497,20 @@ class DocumentService(CommonService):
         except Exception as e:
             logging.error(f"Failed to delete chunks from doc store for document {doc.id}: {e}")
 
+        # Prune this doc's line from the KB's tree-kind navigation
+        # markdown (best-effort — the markdown is a downstream artifact,
+        # and failure here must not block the document delete).
+        try:
+            from rag.advanced_rag.knowlege_compile.dataset_nav import (
+                remove_dataset_nav_doc_sync,
+            )
+
+            remove_dataset_nav_doc_sync(tenant_id, doc.kb_id, doc.id)
+        except Exception as e:
+            logging.warning(
+                f"Failed to prune dataset_nav for document {doc.id}: {e}",
+            )
+
         # Delete document metadata (non-critical, log and continue)
         try:
             DocMetadataService.delete_document_metadata(doc.id, doc.kb_id, tenant_id)
@@ -571,21 +593,20 @@ class DocumentService(CommonService):
     @classmethod
     @DB.connection_context()
     def get_unfinished_docs(cls):
-        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg,
-                  cls.model.run, cls.model.parser_id]
-        unfinished_task_query = Task.select(Task.doc_id).where(
-            (Task.progress >= 0) & (Task.progress < 1)
-        )
+        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run, cls.model.parser_id]
+        unfinished_task_query = Task.select(Task.doc_id).where((Task.progress >= 0) & (Task.progress < 1))
         docs_with_non_failed_tasks = Task.select(Task.doc_id).where(Task.progress >= 0).distinct()
 
         docs = cls.model.select(*fields).where(
             cls.model.status == StatusEnum.VALID.value,
             ~(cls.model.type == FileType.VIRTUAL.value),
             ((cls.model.run.is_null(True)) | (cls.model.run != TaskStatus.CANCEL.value)),
-            (((cls.model.progress < 1) & (cls.model.progress > 0)) |
-             (cls.model.id.in_(unfinished_task_query)) |
-             ((cls.model.progress == -1) & (cls.model.run == TaskStatus.FAIL.value) &
-              (cls.model.id.in_(docs_with_non_failed_tasks)))))  # including GraphRAG/RAPTOR/Mindmap; re-sync failed docs
+            (
+                ((cls.model.progress < 1) & (cls.model.progress > 0))
+                | (cls.model.id.in_(unfinished_task_query))
+                | ((cls.model.progress == -1) & (cls.model.run == TaskStatus.FAIL.value) & (cls.model.id.in_(docs_with_non_failed_tasks)))
+            ),
+        )  # including GraphRAG/RAPTOR/Mindmap; re-sync failed docs
         return list(docs.dicts())
 
     @classmethod
@@ -604,8 +625,7 @@ class DocumentService(CommonService):
             )
             if num == 0:
                 logging.error(
-                    "increment_chunk_num: no document matched doc_id=%s kb_id=%s "
-                    "token_num=%s chunk_num=%s duration=%s",
+                    "increment_chunk_num: no document matched doc_id=%s kb_id=%s token_num=%s chunk_num=%s duration=%s",
                     doc_id,
                     kb_id,
                     token_num,
@@ -623,8 +643,7 @@ class DocumentService(CommonService):
             )
             if num == 0:
                 logging.error(
-                    "increment_chunk_num: no knowledgebase matched kb_id=%s for doc_id=%s "
-                    "token_num=%s chunk_num=%s duration=%s",
+                    "increment_chunk_num: no knowledgebase matched kb_id=%s for doc_id=%s token_num=%s chunk_num=%s duration=%s",
                     kb_id,
                     doc_id,
                     token_num,
@@ -660,8 +679,7 @@ class DocumentService(CommonService):
             )
             if num == 0:
                 logging.error(
-                    "decrement_chunk_num: no knowledgebase matched kb_id=%s for doc_id=%s "
-                    "token_num=%s chunk_num=%s duration=%s",
+                    "decrement_chunk_num: no knowledgebase matched kb_id=%s for doc_id=%s token_num=%s chunk_num=%s duration=%s",
                     kb_id,
                     doc_id,
                     token_num,
@@ -1071,7 +1089,7 @@ def queue_raptor_o_graphrag_tasks(sample_doc, ty, priority, fake_doc_id="", doc_
     """
     if doc_ids is None:
         doc_ids = []
-    assert ty in ["graphrag", "raptor", "mindmap"], "type should be graphrag, raptor or mindmap"
+    assert ty in ["graphrag", "raptor", "mindmap", "artifact", "skill"], "type should be graphrag, raptor, mindmap, artifact or skill"
 
     chunking_config = DocumentService.get_chunking_config(sample_doc["id"])
     hasher = xxhash.xxh64()
@@ -1102,6 +1120,51 @@ def queue_raptor_o_graphrag_tasks(sample_doc, ty, priority, fake_doc_id="", doc_
     return task["id"]
 
 
+def queue_per_doc_raptor_task(doc, priority):
+    """Queue a doc-scoped RAPTOR task.
+
+    Distinct from :func:`queue_raptor_o_graphrag_tasks` (which is KB-scoped
+    and uses ``GRAPH_RAPTOR_FAKE_DOC_ID`` as the task's ``doc_id`` so it
+    fans out across the dataset). Here the task's ``doc_id`` is the real
+    document id, so ``TaskHandler._run_raptor`` runs only on this doc's
+    chunks and the RAPTOR summaries it produces are scoped to this doc.
+
+    Triggered automatically at the tail of standard chunking when the
+    doc's ``parser_config["raptor"]["use_raptor"]`` is true. No
+    cross-task dedup — within one chunking-task execution this helper is
+    called at most once, which is the only invariant the caller needs.
+    """
+    chunking_config = DocumentService.get_chunking_config(doc["id"])
+    hasher = xxhash.xxh64()
+    for field in sorted(chunking_config.keys()):
+        hasher.update(str(chunking_config[field]).encode("utf-8"))
+
+    task = {
+        "id": get_uuid(),
+        "doc_id": doc["id"],
+        "from_page": MAXIMUM_TASK_PAGE_NUMBER,
+        "to_page": MAXIMUM_TASK_PAGE_NUMBER,
+        "task_type": "raptor",
+        "progress_msg": datetime.now().strftime("%H:%M:%S") + " created task raptor",
+        "begin_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+    }
+    for field in ["doc_id", "from_page", "to_page"]:
+        hasher.update(str(task[field]).encode("utf-8"))
+    hasher.update(b"raptor")
+    task["digest"] = hasher.hexdigest()
+    bulk_insert_into_db(Task, [task], True)
+
+    # Redis message carries ``doc_ids`` for downstream consumers
+    # (TaskHandler._run_raptor reads it). Identical to the fake-doc
+    # path's convention so we don't have to special-case the executor.
+    task["doc_ids"] = [doc["id"]]
+    assert REDIS_CONN.queue_product(
+        settings.get_svr_queue_name(priority, "raptor"),
+        message=task,
+    ), "Can't access Redis. Please check the Redis' status."
+    return task["id"]
+
+
 def get_queue_length(priority, suffix="common"):
     group_info = REDIS_CONN.queue_info(settings.get_svr_queue_name(priority, suffix), SVR_CONSUMER_GROUP_NAME)
     if not group_info:
diff --git a/api/db/services/file_commit_service.py b/api/db/services/file_commit_service.py
index e22321ca9d..67cbbb4b59 100644
--- a/api/db/services/file_commit_service.py
+++ b/api/db/services/file_commit_service.py
@@ -15,11 +15,13 @@
 #
 
 import datetime
+import difflib
 import hashlib
 import json
 import logging
+from typing import Optional
 
-from api.db.db_models import DB, FileCommit, FileCommitItem, File
+from api.db.db_models import DB, FileCommit, FileCommitItem, File, User
 from api.db.services.common_service import CommonService
 from api.db.services.file_service import FileService
 from common import settings
@@ -29,6 +31,149 @@ from common.time_utils import current_timestamp, datetime_format
 logger = logging.getLogger(__name__)
 
 
+# ---------------------------------------------------------------------
+# Artifact-commit extension
+# ---------------------------------------------------------------------
+# Artifact-page saves used to land in the retired ``ArtifactCommit`` table.
+# They now flow through :class:`FileCommitService.record_page_edit`, which
+# writes one FileCommit + one FileCommitItem per save with the artifact
+# columns populated (title/comments on FileCommit; diff/content_after_*/
+# slug_kwd/page_type_kwd on FileCommitItem).
+#
+# ``file_id`` for these commits is a stable content-hash of ``(kb_id, slug)``
+# so per-page history queries can filter on it without a real File row —
+# no pseudo-File / virtual-folder machinery is created, so the workspace
+# UI stays free of ghost entries.
+#
+# ``folder_id`` is set to ``kb_id`` directly. The datasets URL prefix
+# (``/datasets/<kb_id>/commits``) resolves the entity id to itself for
+# this scope; workspace file-commit browsing still uses ``/folders/*`` or
+# ``/workspace/*`` with the real folder id.
+#
+# Content storage for ``content_after`` is switched by a module-level
+# constant so ops can move blobs between MinIO and the doc-store index
+# without touching the schema.
+ARTIFACT_CONTENT_STORAGE = "minio"  # one of {"minio", "es"}
+_ARTIFACT_COMMIT_BUCKET_PREFIX = ".artifact_commits"
+_ARTIFACT_ES_KWD = "artifact_commit_content"
+
+
+def _artifact_file_id(kb_id: str, slug: str) -> str:
+    """Deterministic 32-char id for the artifact-page 'file' identity.
+
+    Not a real File row — just an index key that groups all commits for
+    the same page. Hashed so slugs longer than 32 chars still fit.
+    """
+    return hashlib.md5(f"{kb_id}:{slug}".encode("utf-8")).hexdigest()
+
+
+def _unified_diff(before: str, after: str, slug: str) -> str:
+    """Return a unified diff between two markdown strings, or '' if equal."""
+    if (before or "") == (after or ""):
+        return ""
+    return "".join(
+        difflib.unified_diff(
+            (before or "").splitlines(keepends=True),
+            (after or "").splitlines(keepends=True),
+            fromfile=f"a/{slug}",
+            tofile=f"b/{slug}",
+            n=3,
+        )
+    )
+
+
+def _store_content_after(kb_id: str, content: str) -> tuple[str, str]:
+    """Persist ``content`` per :data:`ARTIFACT_CONTENT_STORAGE`. Returns
+    ``(storage_kind, location)`` for the row's persistence columns.
+
+    Content-addressed by SHA-256 so re-saves with identical bodies share
+    the same blob.
+    """
+    content_bytes = (content or "").encode("utf-8")
+    content_hash = hashlib.sha256(content_bytes).hexdigest()
+
+    if ARTIFACT_CONTENT_STORAGE == "minio":
+        location = f"{_ARTIFACT_COMMIT_BUCKET_PREFIX}/{content_hash}"
+        try:
+            storage = settings.STORAGE_IMPL
+            if storage is not None:
+                storage.put(kb_id, location, content_bytes)
+        except Exception:
+            logging.exception(
+                "record_page_edit: MinIO put failed for kb=%s hash=%s",
+                kb_id,
+                content_hash,
+            )
+        return "minio", location
+
+    if ARTIFACT_CONTENT_STORAGE == "es":
+        # Store as a single doc-store row so the same connector serves
+        # reads. The row is not retrievable (available_int=0).
+        from rag.nlp import search as _rag_search
+
+        index = _rag_search.index_name(kb_id)  # kb-scoped index namespace
+        payload = {
+            "id": content_hash,
+            "kb_id": kb_id,
+            "doc_id": kb_id,
+            "compile_kwd": _ARTIFACT_ES_KWD,
+            "content_with_weight": content or "",
+            "available_int": 0,
+        }
+        try:
+            settings.docStoreConn.insert([payload], index, kb_id)
+        except Exception:
+            logging.exception(
+                "record_page_edit: ES insert failed for kb=%s hash=%s",
+                kb_id,
+                content_hash,
+            )
+        return "es", content_hash
+
+    # Unknown storage kind — fall through with empty location; the
+    # detail path treats missing location as "content not recoverable".
+    logging.warning(
+        "record_page_edit: unknown ARTIFACT_CONTENT_STORAGE=%r; content not persisted",
+        ARTIFACT_CONTENT_STORAGE,
+    )
+    return "", ""
+
+
+def _read_content_after(kb_id: str, storage_kind: str, location: str) -> str:
+    """Fetch the previously-stored artifact ``content_after`` blob.
+
+    Returns ``""`` when the location is empty (workspace commits) or the
+    blob is missing.
+    """
+    if not location:
+        return ""
+    try:
+        if storage_kind == "minio":
+            storage = settings.STORAGE_IMPL
+            if storage is None:
+                return ""
+            raw = storage.get(kb_id, location)
+            if isinstance(raw, (bytes, bytearray)):
+                return raw.decode("utf-8", errors="replace")
+            return str(raw or "")
+        if storage_kind == "es":
+            from rag.nlp import search as _rag_search
+
+            index = _rag_search.index_name(kb_id)
+            row = settings.docStoreConn.get(location, index, [kb_id])
+            if isinstance(row, dict):
+                return row.get("content_with_weight") or ""
+            return ""
+    except Exception:
+        logging.exception(
+            "get_page_commit: content read failed kb=%s storage=%s loc=%s",
+            kb_id,
+            storage_kind,
+            location,
+        )
+    return ""
+
+
 def _get_file_parent_id(file_id):
     """Look up a file's parent_id from the File table."""
     try:
@@ -156,11 +301,13 @@ class FileCommitService(CommonService):
                     item["new_location"] = obj_key
 
                     # Update file record in DB
-                    File.update({
-                        "location": obj_key,
-                        "size": len(content_bytes),
-                        "update_time": current_timestamp(),
-                    }).where(File.id == file_id).execute()
+                    File.update(
+                        {
+                            "location": obj_key,
+                            "size": len(content_bytes),
+                            "update_time": current_timestamp(),
+                        }
+                    ).where(File.id == file_id).execute()
 
                     # Update tree state
                     file_parent = _get_file_parent_id(file_id)
@@ -196,11 +343,13 @@ class FileCommitService(CommonService):
                     item["new_location"] = obj_key
 
                     # Update file record
-                    File.update({
-                        "location": obj_key,
-                        "size": len(content_bytes),
-                        "update_time": current_timestamp(),
-                    }).where(File.id == file_id).execute()
+                    File.update(
+                        {
+                            "location": obj_key,
+                            "size": len(content_bytes),
+                            "update_time": current_timestamp(),
+                        }
+                    ).where(File.id == file_id).execute()
 
                     # Update tree state
                     file_parent = _get_file_parent_id(file_id)
@@ -222,9 +371,7 @@ class FileCommitService(CommonService):
                         item["old_location"] = old_location
 
                     # Soft-delete the file record
-                    File.update(status="0", update_time=current_timestamp()).where(
-                        File.id == file_id
-                    ).execute()
+                    File.update(status="0", update_time=current_timestamp()).where(File.id == file_id).execute()
 
                     # Remove from tree state (mark deleted)
                     if file_id in tree_state:
@@ -237,9 +384,7 @@ class FileCommitService(CommonService):
                     item["new_name"] = new_name
 
                     # Update the file record name
-                    File.update(name=new_name, update_time=current_timestamp()).where(
-                        File.id == file_id
-                    ).execute()
+                    File.update(name=new_name, update_time=current_timestamp()).where(File.id == file_id).execute()
 
                     # Update tree state
                     if file_id in tree_state:
@@ -259,9 +404,7 @@ class FileCommitService(CommonService):
     def _get_latest_commit(cls, folder_id):
         """Get the latest (chain head) commit for a folder."""
         try:
-            return cls.model.select().where(
-                cls.model.folder_id == folder_id
-            ).order_by(cls.model.create_time.desc()).first()
+            return cls.model.select().where(cls.model.folder_id == folder_id).order_by(cls.model.create_time.desc()).first()
         except Exception:
             return None
 
@@ -359,27 +502,31 @@ class FileCommitService(CommonService):
 
             if from_entry is not None and to_entry is None:
                 # Present in from, absent in to → deleted
-                diff.append({
-                    "file_id": fid,
-                    "file_name": from_name,
-                    "operation": "delete",
-                    "old_hash": from_hash or (from_item.new_hash if from_item else None),
-                    "old_location": from_entry.get("location", "") if isinstance(from_entry, dict) else None,
-                    "new_hash": None,
-                    "new_location": None,
-                })
+                diff.append(
+                    {
+                        "file_id": fid,
+                        "file_name": from_name,
+                        "operation": "delete",
+                        "old_hash": from_hash or (from_item.new_hash if from_item else None),
+                        "old_location": from_entry.get("location", "") if isinstance(from_entry, dict) else None,
+                        "new_hash": None,
+                        "new_location": None,
+                    }
+                )
 
             elif from_entry is None and to_entry is not None:
                 # Present in to, absent in from → added
-                diff.append({
-                    "file_id": fid,
-                    "file_name": to_name,
-                    "operation": "add",
-                    "old_hash": None,
-                    "old_location": None,
-                    "new_hash": to_hash or (to_item.new_hash if to_item else None),
-                    "new_location": to_entry.get("location", "") if isinstance(to_entry, dict) else None,
-                })
+                diff.append(
+                    {
+                        "file_id": fid,
+                        "file_name": to_name,
+                        "operation": "add",
+                        "old_hash": None,
+                        "old_location": None,
+                        "new_hash": to_hash or (to_item.new_hash if to_item else None),
+                        "new_location": to_entry.get("location", "") if isinstance(to_entry, dict) else None,
+                    }
+                )
 
             else:
                 # Both exist — check for changes
@@ -403,15 +550,17 @@ class FileCommitService(CommonService):
                 if changed:
                     old_loc = from_entry.get("location", "") if isinstance(from_entry, dict) else None
                     new_loc = to_entry.get("location", "") if isinstance(to_entry, dict) else None
-                    diff.append({
-                        "file_id": fid,
-                        "file_name": to_name or from_name,
-                        "operation": operation,
-                        "old_hash": from_hash or (from_item.new_hash if from_item else None),
-                        "old_location": old_loc or (from_item.new_location if from_item else None),
-                        "new_hash": to_hash or (to_item.new_hash if to_item else None),
-                        "new_location": new_loc or (to_item.new_location if to_item else None),
-                    })
+                    diff.append(
+                        {
+                            "file_id": fid,
+                            "file_name": to_name or from_name,
+                            "operation": operation,
+                            "old_hash": from_hash or (from_item.new_hash if from_item else None),
+                            "old_location": old_loc or (from_item.new_location if from_item else None),
+                            "new_hash": to_hash or (to_item.new_hash if to_item else None),
+                            "new_location": new_loc or (to_item.new_location if to_item else None),
+                        }
+                    )
 
         return diff
 
@@ -449,27 +598,33 @@ class FileCommitService(CommonService):
                 live_hash = _compute_file_hash(folder_id, fid)
                 committed_hash = committed_entry.get("hash", "")
                 if live_hash and live_hash != committed_hash:
-                    changes.append({
-                        "file_id": fid,
-                        "file_name": committed_entry.get("name", ""),
-                        "operation": "modify",
-                    })
+                    changes.append(
+                        {
+                            "file_id": fid,
+                            "file_name": committed_entry.get("name", ""),
+                            "operation": "modify",
+                        }
+                    )
             else:
                 if FileService.get_or_none(id=fid) is None:
-                    changes.append({
-                        "file_id": fid,
-                        "file_name": committed_entry.get("name", ""),
-                        "operation": "delete",
-                    })
+                    changes.append(
+                        {
+                            "file_id": fid,
+                            "file_name": committed_entry.get("name", ""),
+                            "operation": "delete",
+                        }
+                    )
 
         # Check for newly added files
         for fid, live_file in current_files.items():
             if fid not in processed:
-                changes.append({
-                    "file_id": fid,
-                    "file_name": live_file.name,
-                    "operation": "add",
-                })
+                changes.append(
+                    {
+                        "file_id": fid,
+                        "file_name": live_file.name,
+                        "operation": "add",
+                    }
+                )
 
         return changes
 
@@ -520,10 +675,14 @@ class FileCommitService(CommonService):
         visited = set()
         while current_id and current_id not in visited:
             visited.add(current_id)
-            item = FileCommitItem.select().where(
-                FileCommitItem.commit_id == current_id,
-                FileCommitItem.file_id == file_id,
-            ).first()
+            item = (
+                FileCommitItem.select()
+                .where(
+                    FileCommitItem.commit_id == current_id,
+                    FileCommitItem.file_id == file_id,
+                )
+                .first()
+            )
             if item and item.new_hash:
                 obj_path = f".objects/{item.new_hash}"
                 storage_impl = settings.STORAGE_IMPL
@@ -538,6 +697,215 @@ class FileCommitService(CommonService):
 
         return None
 
+    # ------------------------------------------------------------------
+    # Artifact-page commit surface
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def record_page_edit(
+        cls,
+        *,
+        tenant_id: str,
+        kb_id: str,
+        page_type: str,
+        slug: str,
+        content_before: str,
+        content_after: str,
+        title: Optional[str] = None,
+        comments: Optional[str] = None,
+        user_id: Optional[str] = None,
+    ) -> Optional[str]:
+        """Persist one artifact-page edit as a FileCommit + FileCommitItem.
+
+        Returns the new commit id, or ``None`` when the diff is empty
+        (no-op save — skipped per the documented v1 contract).
+
+        Bypasses :func:`create_commit` because artifact commits have no
+        real ``File`` row backing them and don't participate in the
+        workspace ``tree_state`` snapshot chain.
+        """
+        diff_text = _unified_diff(content_before or "", content_after or "", slug)
+        if not diff_text:
+            return None
+
+        title_ts = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+        final_title = f"{(title or '').strip() or f'{title_ts} {slug}'} "
+        commit_id = get_uuid()
+        item_id = get_uuid()
+        file_id = _artifact_file_id(kb_id, slug)
+        now_ts = current_timestamp()
+        now_dt = datetime_format(date_time=datetime.datetime.now())
+
+        # Persist the post-save markdown per the configured storage.
+        # A failure here logs but doesn't block the commit row — the diff
+        # is still meaningful without content_after.
+        storage_kind, location = _store_content_after(kb_id, content_after or "")
+
+        # Chain to the previous commit for this page so the history stays
+        # ordered even under concurrent writes (auto-regen + user edit).
+        parent = (
+            FileCommit.select(FileCommit.id)
+            .join(
+                FileCommitItem,
+                on=(FileCommitItem.commit_id == FileCommit.id),
+            )
+            .where((FileCommit.folder_id == kb_id) & (FileCommitItem.file_id == file_id))
+            .order_by(FileCommit.create_time.desc())
+            .first()
+        )
+        parent_id = parent.id if parent else None
+
+        try:
+            with DB.atomic():
+                FileCommit(
+                    id=commit_id,
+                    folder_id=kb_id,
+                    parent_id=parent_id,
+                    # ``message`` stays populated with the same string as
+                    # ``title`` so any generic file-commit consumer still
+                    # renders something sensible.
+                    message=final_title[:512],
+                    author_id=user_id or "",
+                    file_count=1,
+                    tree_state=None,
+                    title=final_title[:255],
+                    comments=comments or "",
+                    create_time=now_ts,
+                    create_date=now_dt,
+                    update_time=now_ts,
+                    update_date=now_dt,
+                ).save(force_insert=True)
+
+                FileCommitItem(
+                    id=item_id,
+                    commit_id=commit_id,
+                    file_id=file_id,
+                    operation="modify" if content_before else "add",
+                    diff=diff_text,
+                    content_after_storage=storage_kind or None,
+                    content_after_location=location or None,
+                    slug_kwd=slug,
+                    page_type_kwd=page_type,
+                    create_time=now_ts,
+                    create_date=now_dt,
+                    update_time=now_ts,
+                    update_date=now_dt,
+                ).save(force_insert=True)
+        except Exception:
+            logging.exception(
+                "record_page_edit: insert failed for kb=%s slug=%s",
+                kb_id,
+                slug,
+            )
+            return None
+
+        return commit_id
+
+    @classmethod
+    @DB.connection_context()
+    def list_page_commits(
+        cls,
+        tenant_id: str,
+        kb_id: str,
+        slug: str,
+        page: int = 1,
+        page_size: int = 50,
+    ) -> tuple[int, list[dict]]:
+        """Return (total, items) for one artifact page's history.
+
+        Filters by ``FileCommitItem.slug_kwd``; joins User for nickname.
+        Heavy columns (``diff``, ``content_after``) are excluded — the
+        detail path fetches them lazily.
+        """
+        page = max(int(page or 1), 1)
+        page_size = max(min(int(page_size or 50), 200), 1)
+        file_id = _artifact_file_id(kb_id, slug)
+
+        base = (
+            FileCommit.select(
+                FileCommit.id,
+                FileCommit.title,
+                FileCommit.comments,
+                FileCommit.author_id,
+                FileCommit.create_time,
+                FileCommit.create_date,
+            )
+            .join(FileCommitItem, on=(FileCommitItem.commit_id == FileCommit.id))
+            .where((FileCommit.folder_id == kb_id) & (FileCommitItem.file_id == file_id) & (FileCommitItem.slug_kwd == slug))
+        )
+        total = base.count()
+        rows = list(base.order_by(FileCommit.create_time.desc()).paginate(page, page_size).dicts())
+        # Preserve the previous response key so callers only re-key once.
+        for r in rows:
+            r["user_id"] = r.pop("author_id", None)
+
+        user_ids = {r["user_id"] for r in rows if r.get("user_id")}
+        nickname_by_id: dict[str, str] = {}
+        if user_ids:
+            try:
+                for u in User.select(User.id, User.nickname).where(User.id.in_(list(user_ids))).dicts():
+                    nickname_by_id[u["id"]] = u.get("nickname") or ""
+            except Exception:
+                logging.exception(
+                    "list_page_commits: nickname lookup failed",
+                )
+        for r in rows:
+            r["user_nickname"] = nickname_by_id.get(r.get("user_id") or "", "")
+        return total, rows
+
+    @classmethod
+    @DB.connection_context()
+    def get_page_commit_detail(
+        cls,
+        tenant_id: str,
+        kb_id: str,
+        commit_id: str,
+    ) -> Optional[dict]:
+        """Return one artifact commit including ``diff`` +
+        ``content_after`` (resolved from storage), or ``None`` when not
+        found. Scoped by ``folder_id == kb_id`` so a leaked commit id
+        can't be read cross-tenant.
+        """
+        commit = FileCommit.get_or_none(
+            (FileCommit.id == commit_id) & (FileCommit.folder_id == kb_id),
+        )
+        if commit is None:
+            return None
+        item = FileCommitItem.get_or_none(FileCommitItem.commit_id == commit_id)
+        if item is None:
+            return None
+
+        content_after = _read_content_after(
+            kb_id,
+            item.content_after_storage or "",
+            item.content_after_location or "",
+        )
+
+        nickname = ""
+        if commit.author_id:
+            try:
+                u = User.get_or_none(User.id == commit.author_id)
+                if u is not None:
+                    nickname = u.nickname or ""
+            except Exception:
+                pass
+
+        return {
+            "id": commit.id,
+            "tenant_id": tenant_id,
+            "kb_id": kb_id,
+            "page_type_kwd": item.page_type_kwd,
+            "slug": item.slug_kwd,
+            "user_id": commit.author_id or None,
+            "user_nickname": nickname,
+            "title": commit.title,
+            "comments": commit.comments,
+            "diff": item.diff,
+            "content_after": content_after,
+            "create_time": commit.create_time,
+            "create_date": commit.create_date,
+        }
+
     @classmethod
     @DB.connection_context()
     def get_file_version_history(cls, file_id):
@@ -545,20 +913,21 @@ class FileCommitService(CommonService):
 
         Returns list of dicts: [{"commit_id", "operation", "hash", "create_time", "message"}]
         """
-        items = FileCommitItem.select().where(FileCommitItem.file_id == file_id).order_by(
-            FileCommitItem.create_time.desc())
+        items = FileCommitItem.select().where(FileCommitItem.file_id == file_id).order_by(FileCommitItem.create_time.desc())
 
         versions = []
         for item in items:
             commit = cls.get_commit(item.commit_id)
             if commit:
-                versions.append({
-                    "commit_id": item.commit_id,
-                    "operation": item.operation,
-                    "hash": item.new_hash or item.old_hash or "",
-                    "create_time": item.create_time,
-                    "message": commit.message,
-                })
+                versions.append(
+                    {
+                        "commit_id": item.commit_id,
+                        "operation": item.operation,
+                        "hash": item.new_hash or item.old_hash or "",
+                        "create_time": item.create_time,
+                        "message": commit.message,
+                    }
+                )
 
         return versions
 
@@ -620,9 +989,7 @@ def _build_hierarchical_tree(tree_state, root_folder_id):
         }
         # File children
         for fid, entry in files_by_parent.get(node_id, []):
-            fn = {"id": fid, "name": entry.get("name", fid), "type": "file",
-                  "hash": entry.get("hash", ""), "size": entry.get("size", 0),
-                  "status": entry.get("status", "1")}
+            fn = {"id": fid, "name": entry.get("name", fid), "type": "file", "hash": entry.get("hash", ""), "size": entry.get("size", 0), "status": entry.get("status", "1")}
             if entry.get("location"):
                 fn["location"] = entry["location"]
             node["children"].append(fn)
diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py
index d6bb9e1db1..779669f5c0 100644
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@@ -46,6 +46,7 @@ class KnowledgebaseService(CommonService):
     Attributes:
         model: The Knowledgebase model class for database operations.
     """
+
     model = Knowledgebase
 
     @classmethod
@@ -59,13 +60,7 @@ class KnowledgebaseService(CommonService):
         - KBs owned by the current user (`tenant_id == user_id`)
         Always constrained to `StatusEnum.VALID`.
         """
-        return (
-            (
-                (cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == TenantPermission.TEAM.value))
-                | (cls.model.tenant_id == user_id)
-            )
-            & (cls.model.status == StatusEnum.VALID.value)
-        )
+        return ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.tenant_id == user_id)) & (cls.model.status == StatusEnum.VALID.value)
 
     @classmethod
     @DB.connection_context()
@@ -94,8 +89,7 @@ class KnowledgebaseService(CommonService):
                 2. The user is not the creator of the dataset
         """
         # Check if a dataset can be deleted by a user
-        docs = cls.model.select(
-            cls.model.id).where(cls.model.id == kb_id, cls.model.created_by == user_id).paginate(0, 1)
+        docs = cls.model.select(cls.model.id).where(cls.model.id == kb_id, cls.model.created_by == user_id).paginate(0, 1)
         docs = docs.dicts()
         if not docs:
             return False
@@ -127,10 +121,10 @@ class KnowledgebaseService(CommonService):
         # Check parsing status of each document
         for doc in docs:
             # If document is being parsed, don't allow chat creation
-            if doc['run'] == TaskStatus.RUNNING.value or doc['run'] == TaskStatus.CANCEL.value or doc['run'] == TaskStatus.FAIL.value:
+            if doc["run"] == TaskStatus.RUNNING.value or doc["run"] == TaskStatus.CANCEL.value or doc["run"] == TaskStatus.FAIL.value:
                 return False, f"Document '{doc['name']}' in dataset '{kb.name}' is still being parsed. Please wait until all documents are parsed before starting a chat."
             # If document is not yet parsed and has no chunks, don't allow chat creation
-            if doc['run'] == TaskStatus.UNSTART.value and doc['chunk_num'] == 0:
+            if doc["run"] == TaskStatus.UNSTART.value and doc["chunk_num"] == 0:
                 return False, f"Document '{doc['name']}' in dataset '{kb.name}' has not been parsed yet. Please parse all documents before starting a chat."
 
         return True, None
@@ -143,20 +137,14 @@ class KnowledgebaseService(CommonService):
         #     kb_ids: List of dataset IDs
         # Returns:
         #     List of document IDs
-        doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where(
-            cls.model.id.in_(kb_ids)
-        )
+        doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where(cls.model.id.in_(kb_ids))
         doc_ids = list(doc_ids.dicts())
         doc_ids = [doc["document_id"] for doc in doc_ids]
         return doc_ids
 
     @classmethod
     @DB.connection_context()
-    def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
-                          page_number, items_per_page,
-                          orderby, desc, keywords,
-                          parser_id=None
-                          ):
+    def get_by_tenant_ids(cls, joined_tenant_ids, user_id, page_number, items_per_page, orderby, desc, keywords, parser_id=None):
         # Get knowledge bases by tenant IDs with pagination and filtering
         # Args:
         #     joined_tenant_ids: List of tenant IDs
@@ -183,17 +171,25 @@ class KnowledgebaseService(CommonService):
             cls.model.parser_id,
             cls.model.embd_id,
             User.nickname,
-            User.avatar.alias('tenant_avatar'),
-            cls.model.update_time
+            User.avatar.alias("tenant_avatar"),
+            cls.model.update_time,
         ]
         if keywords:
-            kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where(
-                cls._visibility_and_status_filter(joined_tenant_ids, user_id),
-                fn.LOWER(cls.model.name).contains(keywords.lower()),
+            kbs = (
+                cls.model.select(*fields)
+                .join(User, on=(cls.model.tenant_id == User.id))
+                .where(
+                    cls._visibility_and_status_filter(joined_tenant_ids, user_id),
+                    fn.LOWER(cls.model.name).contains(keywords.lower()),
+                )
             )
         else:
-            kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where(
-                cls._visibility_and_status_filter(joined_tenant_ids, user_id),
+            kbs = (
+                cls.model.select(*fields)
+                .join(User, on=(cls.model.tenant_id == User.id))
+                .where(
+                    cls._visibility_and_status_filter(joined_tenant_ids, user_id),
+                )
             )
         if parser_id:
             kbs = kbs.where(cls.model.parser_id == parser_id)
@@ -223,7 +219,7 @@ class KnowledgebaseService(CommonService):
             cls.model.chunk_num,
             cls.model.status,
             cls.model.create_date,
-            cls.model.update_date
+            cls.model.update_date,
         ]
         # find team kb and owned kb
         kbs = cls.model.select(*fields).where(cls._visibility_and_status_filter(tenant_ids, user_id))
@@ -287,15 +283,19 @@ class KnowledgebaseService(CommonService):
             cls.model.raptor_task_finish_at,
             cls.model.mindmap_task_id,
             cls.model.mindmap_task_finish_at,
+            cls.model.artifact_task_id,
+            cls.model.artifact_task_finish_at,
+            cls.model.skill_task_id,
+            cls.model.skill_task_finish_at,
             cls.model.create_time,
-            cls.model.update_time
-            ]
-        kbs = cls.model.select(*fields)\
-                .join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
-            .where(
-            (cls.model.id == kb_id),
-            (cls.model.status == StatusEnum.VALID.value)
-        ).dicts()
+            cls.model.update_time,
+        ]
+        kbs = (
+            cls.model.select(*fields)
+            .join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)
+            .where((cls.model.id == kb_id), (cls.model.status == StatusEnum.VALID.value))
+            .dicts()
+        )
         if not kbs:
             return None
         return kbs[0]
@@ -362,11 +362,7 @@ class KnowledgebaseService(CommonService):
         #     tenant_id: Tenant ID
         # Returns:
         #     Tuple of (exists, knowledge_base)
-        kb = cls.model.select().where(
-            (cls.model.name == kb_name)
-            & (cls.model.tenant_id == tenant_id)
-            & (cls.model.status == StatusEnum.VALID.value)
-        )
+        kb = cls.model.select().where((cls.model.name == kb_name) & (cls.model.tenant_id == tenant_id) & (cls.model.status == StatusEnum.VALID.value))
         if kb:
             return True, kb[0]
         return False, None
@@ -379,17 +375,9 @@ class KnowledgebaseService(CommonService):
         #     List of all dataset IDs
         return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
 
-
     @classmethod
     @DB.connection_context()
-    def create_with_name(
-        cls,
-        *,
-        name: str,
-        tenant_id: str,
-        parser_id: str | None = None,
-        **kwargs
-    ):
+    def create_with_name(cls, *, name: str, tenant_id: str, parser_id: str | None = None, **kwargs):
         """Create a dataset (knowledgebase) by name with kb_app defaults.
 
         This encapsulates the creation logic used in kb_app.create so other callers
@@ -429,7 +417,7 @@ class KnowledgebaseService(CommonService):
             "tenant_id": tenant_id,
             "created_by": tenant_id,
             "parser_id": (parser_id or "naive"),
-            **kwargs # Includes optional fields such as description, language, permission, avatar, parser_config, etc.
+            **kwargs,  # Includes optional fields such as description, language, permission, avatar, parser_config, etc.
         }
 
         # Update parser_config (always override with validated default/merged config)
@@ -438,11 +426,9 @@ class KnowledgebaseService(CommonService):
 
         return True, payload
 
-
     @classmethod
     @DB.connection_context()
-    def get_list(cls, joined_tenant_ids, user_id,
-                 page_number, items_per_page, orderby, desc, id, name, keywords, parser_id=None):
+    def get_list(cls, joined_tenant_ids, user_id, page_number, items_per_page, orderby, desc, id, name, keywords, parser_id=None):
         # Get list of knowledge bases with filtering and pagination
         # Args:
         #     joined_tenant_ids: List of tenant IDs
@@ -566,7 +552,7 @@ class KnowledgebaseService(CommonService):
             kb.save(only=dirty_fields)
         except ValueError as e:
             if str(e) == "no data to save!":
-                pass # that's OK
+                pass  # that's OK
             else:
                 raise e
 
@@ -577,21 +563,17 @@ class KnowledgebaseService(CommonService):
         if not kb_row:
             raise RuntimeError(f"kb_id {kb_id} does not exist")
         update_dict = {
-            'doc_num': kb_row.doc_num - doc_num_info['doc_num'],
-            'chunk_num': kb_row.chunk_num - doc_num_info['chunk_num'],
-            'token_num': kb_row.token_num - doc_num_info['token_num'],
-            'update_time': current_timestamp(),
-            'update_date': datetime_format(datetime.now())
+            "doc_num": kb_row.doc_num - doc_num_info["doc_num"],
+            "chunk_num": kb_row.chunk_num - doc_num_info["chunk_num"],
+            "token_num": kb_row.token_num - doc_num_info["token_num"],
+            "update_time": current_timestamp(),
+            "update_date": datetime_format(datetime.now()),
         }
         return cls.model.update(update_dict).where(cls.model.id == kb_id).execute()
 
     @classmethod
     @DB.connection_context()
     def get_null_tenant_embd_id_row(cls):
-        fields = [
-            cls.model.id,
-            cls.model.tenant_id,
-            cls.model.embd_id
-        ]
+        fields = [cls.model.id, cls.model.tenant_id, cls.model.embd_id]
         objs = cls.model.select(*fields).where(cls.model.tenant_embd_id.is_null())
         return list(objs)
diff --git a/api/db/services/pipeline_operation_log_service.py b/api/db/services/pipeline_operation_log_service.py
index 6c766f859c..12fbef5201 100644
--- a/api/db/services/pipeline_operation_log_service.py
+++ b/api/db/services/pipeline_operation_log_service.py
@@ -98,8 +98,8 @@ class PipelineOperationLogService(CommonService):
         if document_id != GRAPH_RAPTOR_FAKE_DOC_ID:
             referred_document_id = document_id
 
-        # no need to update document for graph rag, raptor mindmap task
-        if task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
+        # no need to update document for KB-level fan-out tasks
+        if task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP, PipelineTaskType.ARTIFACT, PipelineTaskType.SKILL]:
             ok, document = DocumentService.get_by_id(referred_document_id)
             if not ok:
                 logging.warning(f"Document for referred_document_id {referred_document_id} not found")
@@ -137,7 +137,7 @@ class PipelineOperationLogService(CommonService):
         if task_type not in VALID_PIPELINE_TASK_TYPES:
             raise ValueError(f"Invalid task type: {task_type}")
 
-        if task_type in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
+        if task_type in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP, PipelineTaskType.ARTIFACT, PipelineTaskType.SKILL]:
             # query task to get progress information from task
             ok, task = TaskService.get_by_id(task_id)
             if not ok:
@@ -166,6 +166,16 @@ class PipelineOperationLogService(CommonService):
                     document.kb_id,
                     {"mindmap_task_finish_at": finish_at},
                 )
+            elif task_type == PipelineTaskType.ARTIFACT:
+                KnowledgebaseService.update_by_id(
+                    document.kb_id,
+                    {"artifact_task_finish_at": finish_at},
+                )
+            elif task_type == PipelineTaskType.SKILL:
+                KnowledgebaseService.update_by_id(
+                    document.kb_id,
+                    {"skill_task_finish_at": finish_at},
+                )
 
         log = dict(
             id=get_uuid(),
diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py
index 8dc03ec8fc..2e02c50f44 100644
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@@ -37,7 +37,95 @@ from rag.nlp import search
 
 CANVAS_DEBUG_DOC_ID = "dataflow_x"
 GRAPH_RAPTOR_FAKE_DOC_ID = "graph_raptor_x"
-TASK_MAX_LOG_LENGTH = int(os.environ.get("TASK_MAX_LOG_LENGTH", 3000)) # TEXT MAX is 64 KiB bytes!
+TASK_MAX_LOG_LENGTH = int(os.environ.get("TASK_MAX_LOG_LENGTH", 3000))  # TEXT MAX is 64 KiB bytes!
+DOC_CHUNKING_COUNTER_TTL_SECONDS = 7 * 24 * 3600
+
+
+def _doc_chunking_pending_key(doc_id: str) -> str:
+    return f"doc:chunking_pending:{doc_id}"
+
+
+def _doc_chunking_aborted_key(doc_id: str) -> str:
+    return f"doc:chunking_aborted:{doc_id}"
+
+
+def _doc_chunking_done_key(task_id: str) -> str:
+    return f"doc:chunking_done:{task_id}"
+
+
+def seed_doc_chunking_counter(doc_id: str, pending_count: int) -> bool:
+    if not doc_id or pending_count <= 0:
+        return False
+    try:
+        REDIS_CONN.delete(_doc_chunking_aborted_key(doc_id))
+        return REDIS_CONN.set(
+            _doc_chunking_pending_key(doc_id),
+            str(pending_count),
+            exp=DOC_CHUNKING_COUNTER_TTL_SECONDS,
+        )
+    except Exception:
+        logging.exception("Failed to seed chunking counter for doc %s", doc_id)
+        return False
+
+
+def clear_doc_chunking_counter(doc_id: str) -> None:
+    if not doc_id:
+        return
+    try:
+        REDIS_CONN.delete(_doc_chunking_pending_key(doc_id))
+    except Exception:
+        logging.exception("Failed to clear chunking counter for doc %s", doc_id)
+
+
+def abort_doc_chunking_counter(doc_id: str) -> None:
+    if not doc_id:
+        return
+    try:
+        REDIS_CONN.delete(_doc_chunking_pending_key(doc_id))
+        REDIS_CONN.set(
+            _doc_chunking_aborted_key(doc_id),
+            "1",
+            exp=DOC_CHUNKING_COUNTER_TTL_SECONDS,
+        )
+    except Exception:
+        logging.exception("Failed to abort chunking counter for doc %s", doc_id)
+
+
+def is_doc_chunking_aborted(doc_id: str) -> bool:
+    if not doc_id:
+        return False
+    try:
+        return bool(REDIS_CONN.get(_doc_chunking_aborted_key(doc_id)))
+    except Exception:
+        logging.exception("Failed to read chunking abort marker for doc %s", doc_id)
+        return False
+
+
+def credit_doc_chunking_task(doc_id: str, task_id: str) -> int | None:
+    """Credit one completed standard chunking task.
+
+    Returns the post-decrement pending count when this task was credited for
+    the first time. Returns a positive value when this task was already
+    credited, so callers treat retries as not-last.
+    """
+    if not doc_id or not task_id:
+        return None
+    try:
+        first_credit = REDIS_CONN.set_if_absent(
+            _doc_chunking_done_key(task_id),
+            "1",
+            exp=DOC_CHUNKING_COUNTER_TTL_SECONDS,
+        )
+        if not first_credit:
+            return 1
+        pending_key = _doc_chunking_pending_key(doc_id)
+        if REDIS_CONN.get(pending_key) is None:
+            return -1
+        return REDIS_CONN.decrby(pending_key, 1)
+    except Exception:
+        logging.exception("Failed to credit chunking task %s for doc %s", task_id, doc_id)
+        return None
+
 
 def trim_header_by_lines(text: str, max_length) -> str:
     # Trim header text to maximum length while preserving line breaks
@@ -50,8 +138,8 @@ def trim_header_by_lines(text: str, max_length) -> str:
     if len_text <= max_length:
         return text
     for i in range(len_text):
-        if text[i] == '\n' and len_text - i <= max_length:
-            return text[i + 1:]
+        if text[i] == "\n" and len_text - i <= max_length:
+            return text[i + 1 :]
     return text
 
 
@@ -69,6 +157,7 @@ class TaskService(CommonService):
     Attributes:
         model: The Task model class for database operations.
     """
+
     model = Task
 
     @classmethod
@@ -96,6 +185,7 @@ class TaskService(CommonService):
             cls.model.doc_id,
             cls.model.from_page,
             cls.model.to_page,
+            cls.model.task_type,
             cls.model.retry_count,
             Document.kb_id,
             Document.parser_id,
@@ -116,32 +206,34 @@ class TaskService(CommonService):
         ]
         docs = (
             cls.model.select(*fields)
-                .join(Document, on=(doc_id == Document.id))
-                .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
-                .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
-                .where(cls.model.id == task_id)
+            .join(Document, on=(doc_id == Document.id))
+            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
+            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
+            .where(cls.model.id == task_id)
         )
         docs = list(docs.dicts())
         if not docs:
             return None
+        doc = docs[0]
 
         msg = f"\n{datetime.now().strftime('%H:%M:%S')} Task has been received."
         prog = random.random() / 10.0
-        if docs[0]["retry_count"] >= 3:
+        if doc["retry_count"] >= 3:
             msg = "\nERROR: Task is abandoned after 3 times attempts."
             prog = -1
 
         cls.model.update(
             progress_msg=cls.model.progress_msg + msg,
             progress=prog,
-            retry_count=docs[0]["retry_count"] + 1,
-        ).where(cls.model.id == docs[0]["id"]).execute()
+            retry_count=doc["retry_count"] + 1,
+        ).where(cls.model.id == doc["id"]).execute()
 
         if docs[0]["retry_count"] >= 3:
+            abort_doc_chunking_counter(docs[0]["doc_id"])
             DocumentService.update_by_id(docs[0]["doc_id"], {"progress": -1, "run": TaskStatus.FAIL.value, "update_time": current_timestamp(), "update_date": get_format_time()})
             return None
 
-        return docs[0]
+        return doc
 
     @classmethod
     @DB.connection_context()
@@ -165,10 +257,7 @@ class TaskService(CommonService):
             cls.model.digest,
             cls.model.chunk_ids,
         ]
-        tasks = (
-            cls.model.select(*fields).order_by(cls.model.from_page.asc(), cls.model.create_time.desc())
-            .where(cls.model.doc_id == doc_id)
-        )
+        tasks = cls.model.select(*fields).order_by(cls.model.from_page.asc(), cls.model.create_time.desc()).where(cls.model.doc_id == doc_id)
         tasks = list(tasks.dicts())
         if not tasks:
             return None
@@ -189,20 +278,8 @@ class TaskService(CommonService):
             list[dict]: List of task dictionaries containing task details.
                        Returns None if no tasks are found.
         """
-        fields = [
-            cls.model.id,
-            cls.model.doc_id,
-            cls.model.from_page,
-            cls.model.progress,
-            cls.model.progress_msg,
-            cls.model.digest,
-            cls.model.chunk_ids,
-            cls.model.create_time
-        ]
-        tasks = (
-            cls.model.select(*fields).order_by(cls.model.create_time.desc())
-            .where(cls.model.doc_id.in_(doc_ids))
-        )
+        fields = [cls.model.id, cls.model.doc_id, cls.model.from_page, cls.model.progress, cls.model.progress_msg, cls.model.digest, cls.model.chunk_ids, cls.model.create_time]
+        tasks = cls.model.select(*fields).order_by(cls.model.create_time.desc()).where(cls.model.doc_id.in_(doc_ids))
         tasks = list(tasks.dicts())
         if not tasks:
             return None
@@ -238,9 +315,7 @@ class TaskService(CommonService):
         """
         with DB.lock("get_task", -1):
             docs = (
-                cls.model.select(
-                    *[Document.id, Document.kb_id, Document.location, File.parent_id]
-                )
+                cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id])
                 .join(Document, on=(cls.model.doc_id == Document.id))
                 .join(
                     File2Document,
@@ -326,11 +401,7 @@ class TaskService(CommonService):
                 cls.model.update(progress_msg=progress_msg).where(cls.model.id == id).execute()
             if "progress" in info:
                 prog = info["progress"]
-                cls.model.update(progress=prog).where(
-                    (cls.model.id == id) &
-                    ((prog >= 1) | ((cls.model.progress != -1) &
-                    ((prog == -1) | (prog > cls.model.progress))))
-                ).execute()
+                cls.model.update(progress=prog).where((cls.model.id == id) & ((prog >= 1) | ((cls.model.progress != -1) & ((prog == -1) | (prog > cls.model.progress))))).execute()
         else:
             with DB.lock("update_progress", -1):
                 if info["progress_msg"]:
@@ -338,11 +409,7 @@ class TaskService(CommonService):
                     cls.model.update(progress_msg=progress_msg).where(cls.model.id == id).execute()
                 if "progress" in info:
                     prog = info["progress"]
-                    cls.model.update(progress=prog).where(
-                        (cls.model.id == id) &
-                        ((prog >= 1) | ((cls.model.progress != -1) &
-                        ((prog == -1) | (prog > cls.model.progress))))
-                    ).execute()
+                    cls.model.update(progress=prog).where((cls.model.id == id) & ((prog >= 1) | ((cls.model.progress != -1) & ((prog == -1) | (prog > cls.model.progress))))).execute()
 
         begin_at = task.begin_at
         if begin_at is not None:
@@ -456,18 +523,22 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
             if pre_task["chunk_ids"]:
                 pre_chunk_ids.extend(pre_task["chunk_ids"].split())
         if pre_chunk_ids:
-            settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
-                                         chunking_config["kb_id"])
+            settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]), chunking_config["kb_id"])
     DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})
 
     bulk_insert_into_db(Task, parse_task_array, True)
     DocumentService.begin2parse(doc["id"])
 
     unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
-    for unfinished_task in unfinished_task_array:
-        assert REDIS_CONN.queue_product(
-            settings.get_svr_queue_name(priority, suffix), message=unfinished_task
-        ), "Can't access Redis. Please check the Redis' status."
+    chunking_n = sum(1 for task in unfinished_task_array if not task.get("task_type"))
+    if chunking_n > 0:
+        assert seed_doc_chunking_counter(doc["id"], chunking_n), "Can't access Redis. Please check the Redis' status."
+    try:
+        for unfinished_task in unfinished_task_array:
+            assert REDIS_CONN.queue_product(settings.get_svr_queue_name(priority, suffix), message=unfinished_task), "Can't access Redis. Please check the Redis' status."
+    except Exception:
+        abort_doc_chunking_counter(doc["id"])
+        raise
 
 
 def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config: dict):
@@ -494,8 +565,7 @@ def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config:
     idx = 0
     while idx < len(prev_tasks):
         prev_task = prev_tasks[idx]
-        if prev_task.get("from_page", 0) == task.get("from_page", 0) \
-                and prev_task.get("digest", 0) == task.get("digest", ""):
+        if prev_task.get("from_page", 0) == task.get("from_page", 0) and prev_task.get("digest", 0) == task.get("digest", ""):
             break
         idx += 1
 
@@ -506,18 +576,22 @@ def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config:
         return 0
     task["chunk_ids"] = prev_task["chunk_ids"]
     task["progress"] = 1.0
-    if "from_page" in task and "to_page" in task and (int(task['to_page']) - int(task['from_page']) >= 10 ** 6 or (int(task['from_page']) == MAXIMUM_TASK_PAGE_NUMBER and int(task['to_page']) == MAXIMUM_TASK_PAGE_NUMBER)):
+    if (
+        "from_page" in task
+        and "to_page" in task
+        and (int(task["to_page"]) - int(task["from_page"]) >= 10**6 or (int(task["from_page"]) == MAXIMUM_TASK_PAGE_NUMBER and int(task["to_page"]) == MAXIMUM_TASK_PAGE_NUMBER))
+    ):
         task["progress_msg"] = f"Page({task['from_page']}~{task['to_page']}): "
     else:
         task["progress_msg"] = ""
-    task["progress_msg"] = " ".join(
-        [datetime.now().strftime("%H:%M:%S"), task["progress_msg"], "Reused previous task's chunks."])
+    task["progress_msg"] = " ".join([datetime.now().strftime("%H:%M:%S"), task["progress_msg"], "Reused previous task's chunks."])
     prev_task["chunk_ids"] = ""
 
     return len(task["chunk_ids"].split())
 
 
 def cancel_all_task_of(doc_id):
+    abort_doc_chunking_counter(doc_id)
     for t in TaskService.query(doc_id=doc_id):
         try:
             REDIS_CONN.set(f"{t.id}-cancel", "x")
@@ -535,7 +609,7 @@ def has_canceled(task_id):
     return False
 
 
-def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DEBUG_DOC_ID, file:dict=None, priority: int=0, rerun:bool=False) -> tuple[bool, str]:
+def queue_dataflow(tenant_id: str, flow_id: str, task_id: str, doc_id: str = CANVAS_DEBUG_DOC_ID, file: dict = None, priority: int = 0, rerun: bool = False) -> tuple[bool, str]:
 
     task = dict(
         id=task_id,
@@ -544,7 +618,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
         to_page=MAXIMUM_TASK_PAGE_NUMBER,
         task_type="dataflow" if not rerun else "dataflow_rerun",
         priority=priority,
-        begin_at= datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        begin_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     )
     if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
         TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
@@ -556,9 +630,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
     task["dataflow_id"] = flow_id
     task["file"] = file
 
-    if not REDIS_CONN.queue_product(
-            settings.get_svr_queue_name(priority, "common"), message=task
-    ):
+    if not REDIS_CONN.queue_product(settings.get_svr_queue_name(priority, "common"), message=task):
         return False, "Can't access Redis. Please check the Redis' status."
 
     return True, ""
diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py
index c2fc205dc3..d20ae4fbbe 100644
--- a/api/utils/validation_utils.py
+++ b/api/utils/validation_utils.py
@@ -423,6 +423,7 @@ class ParserConfig(Base):
     filename_embd_weight: Annotated[float | None, Field(default=0.1, ge=0.0, le=1.0)]
     task_page_size: Annotated[int | None, Field(default=None, ge=1)]
     pages: Annotated[list[list[int]] | None, Field(default=None)]
+    compilation_template_group_id: Annotated[list[str], Field(default_factory=list)]
     ext: Annotated[dict, Field(default={})]
     # Table parser: column name -> "indexing" | "metadata" | "both". Absence => all columns "both".
     # Table parser: "auto" = all columns both (default), "manual" = use table_column_roles. None → treated as "auto".
@@ -444,6 +445,25 @@ class ParserConfig(Base):
             out[k] = "indexing" if val == "vectorize" else val
         return out
 
+    @field_validator("compilation_template_group_id", mode="before")
+    @classmethod
+    def normalize_compilation_template_group_ids(cls, v: Any) -> Any:
+        if v is None:
+            return []
+        raw = [v] if isinstance(v, str) else v
+        if not isinstance(raw, list):
+            return []
+        ids: list[str] = []
+        seen: set[str] = set()
+        for group_id in raw:
+            if not isinstance(group_id, str):
+                continue
+            group_id = group_id.strip()
+            if group_id and group_id not in seen:
+                seen.add(group_id)
+                ids.append(group_id)
+        return ids
+
 
 class UpdateDocumentReq(Base):
     """
@@ -563,7 +583,7 @@ class CreateDatasetReq(Base):
             CreateDatasetReq(avatar="data:video/mp4;base64,...")  # Unsupported MIME type
             ```
         """
-        if not v: # cover both None and empty string
+        if not v:  # cover both None and empty string
             return v
 
         if "," in v:
diff --git a/common/constants.py b/common/constants.py
index 87c9ff3f2b..17feeb3a4b 100644
--- a/common/constants.py
+++ b/common/constants.py
@@ -169,9 +169,11 @@ class PipelineTaskType(StrEnum):
     GRAPH_RAG = "GraphRAG"
     MINDMAP = "Mindmap"
     MEMORY = "Memory"
+    ARTIFACT = "Artifact"
+    SKILL = "Skill"
 
 
-VALID_PIPELINE_TASK_TYPES = {PipelineTaskType.PARSE, PipelineTaskType.DOWNLOAD, PipelineTaskType.RAPTOR, PipelineTaskType.GRAPH_RAG, PipelineTaskType.MINDMAP}
+VALID_PIPELINE_TASK_TYPES = {PipelineTaskType.PARSE, PipelineTaskType.DOWNLOAD, PipelineTaskType.RAPTOR, PipelineTaskType.GRAPH_RAG, PipelineTaskType.MINDMAP, PipelineTaskType.ARTIFACT, PipelineTaskType.SKILL}
 
 
 class MCPServerType(StrEnum):
diff --git a/conf/infinity_mapping.json b/conf/infinity_mapping.json
index 893e18632e..dfc07d2e44 100644
--- a/conf/infinity_mapping.json
+++ b/conf/infinity_mapping.json
@@ -40,5 +40,37 @@
 	"toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"raptor_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"raptor_layer_int": {"type": "integer", "default": 0},
-	"extra": {"type": "varchar", "default": ""}
+	"extra": {"type": "varchar", "default": ""},
+
+	"compile_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"source_chunk_ids": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"source_doc_ids": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"compilation_template_ids": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"compilation_template_kind_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"chunk_hash_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"input_hash_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"artifact_slug_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"md_with_weight": {"type": "varchar", "default": ""},
+	"summary_with_weight": {"type": "varchar", "default": ""},
+	"skill_with_weight": {"type": "varchar", "default": ""},
+	"skill_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"children_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"doc_ids_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"slug_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"title_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"page_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"entity_names_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"outlinks_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"related_kb_pages_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"from_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"to_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"rechunk_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"rechunked_from_template_id": {"type": "varchar", "default": ""},
+	"rechunked_from_chunk_ids": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"superseded_by_chunk_id": {"type": "varchar", "default": ""},
+	"doc_count_int": {"type": "integer", "default": 0},
+	"depth_int": {"type": "integer", "default": 0},
+	"outlinks_int": {"type": "integer", "default": 0},
+	"token_num": {"type": "integer", "default": 0}
 }
diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py
index 47d02513c7..028edc8df3 100644
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@@ -701,11 +701,11 @@ class RAGFlowPdfParser:
             logging.info(f"Added {added} OCR results from rotated table {table_index}")
 
     def __ocr(self, pagenum, img, chars, ZM=3, device_id: int | None = None):
-        start = timer()
+        # start = timer()
         bxs = self.ocr.detect(np.array(img), device_id)
-        logging.info(f"__ocr detecting boxes of an image cost ({timer() - start}s)")
+        # logging.info(f"__ocr detecting boxes of an image cost ({timer() - start}s)")
 
-        start = timer()
+        # start = timer()
         if not bxs:
             self.boxes.append([])
             return
@@ -771,8 +771,8 @@ class RAGFlowPdfParser:
                 )
                 b["text"] = ""
 
-        logging.info(f"__ocr sorting {len(chars)} chars cost {timer() - start}s")
-        start = timer()
+        # logging.info(f"__ocr sorting {len(chars)} chars cost {timer() - start}s")
+        # start = timer()
         boxes_to_reg = []
         img_np = None
         for b in bxs:
@@ -787,7 +787,7 @@ class RAGFlowPdfParser:
         for i in range(len(boxes_to_reg)):
             boxes_to_reg[i]["text"] = texts[i]
             del boxes_to_reg[i]["box_image"]
-        logging.info(f"__ocr recognize {len(bxs)} boxes cost {timer() - start}s")
+        # logging.info(f"__ocr recognize {len(bxs)} boxes cost {timer() - start}s")
         bxs = [b for b in bxs if b["text"]]
         if self.mean_height[pagenum - 1] == 0:
             self.mean_height[pagenum - 1] = np.median([b["bottom"] - b["top"] for b in bxs])
diff --git a/rag/advanced_rag/knowlege_compile/__init__.py b/rag/advanced_rag/knowlege_compile/__init__.py
new file mode 100644
index 0000000000..2a848ac2f1
--- /dev/null
+++ b/rag/advanced_rag/knowlege_compile/__init__.py
@@ -0,0 +1,43 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from .structure import compile_structure_from_text, merge_compiled_structures
+from .wiki import (
+    WIKI_DRAFT_COMPILE_KWD,
+    WIKI_MAP_COMPILE_KWD,
+    WIKI_PAGE_COMPILE_KWD,
+    WIKI_PLAN_COMPILE_KWD,
+    WIKI_REDUCE_COMPILE_KWD,
+    wiki_map_from_chunks,
+    wiki_plan_from_reduction,
+    wiki_reduce_from_extracts,
+    wiki_refine_from_plan,
+)
+
+
+__all__ = [
+    "compile_structure_from_text",
+    "merge_compiled_structures",
+    "wiki_map_from_chunks",
+    "wiki_reduce_from_extracts",
+    "wiki_plan_from_reduction",
+    "wiki_refine_from_plan",
+    "WIKI_MAP_COMPILE_KWD",
+    "WIKI_REDUCE_COMPILE_KWD",
+    "WIKI_PLAN_COMPILE_KWD",
+    "WIKI_PAGE_COMPILE_KWD",
+    "WIKI_DRAFT_COMPILE_KWD",
+]
diff --git a/rag/advanced_rag/knowlege_compile/_common.py b/rag/advanced_rag/knowlege_compile/_common.py
new file mode 100644
index 0000000000..83e833a37c
--- /dev/null
+++ b/rag/advanced_rag/knowlege_compile/_common.py
@@ -0,0 +1,913 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+"""Shared helpers for the knowlege_compile pipelines (structure + wiki).
+
+Both ``structure.py`` (compile_structure_from_text / merge_compiled_structures)
+and ``wiki.py`` (the MAP→REDUCE→PLAN→REFINE artifact pipeline) need the same set
+of plumbing: encode-through-LLMBundle, stable id minting, search-tokenizer
+pairs, order-preserving chunk-id unions, defensive LLMBundle validation, the
+``chat_mdl.max_length * INPUT_UTILIZATION - prompt_overhead`` token-budget
+calculation, and thin ES I/O wrappers.
+
+Anything in this module is meant to be:
+  - LLMBundle-aware but provider-agnostic;
+  - Safe to import from either pipeline without circular references;
+  - Synchronous unless an awaitable behaviour is required.
+
+Heavier shared logic that is conceptually identical but happens to differ in
+shape between the two pipelines (e.g. pairwise-cosine dedup, LLM "are these
+the same?" batching) intentionally stays in each pipeline file for now —
+extract those only when their shapes converge.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import string
+from typing import Any, Awaitable, Callable, Iterable, Optional
+
+import xxhash
+
+from common.misc_utils import thread_pool_exec
+from common.token_utils import num_tokens_from_string
+from rag.nlp import rag_tokenizer
+from rag.prompts.generator import INPUT_UTILIZATION, gen_json, split_chunks
+
+
+# ---------------------------------------------------------------------------
+# ID minting
+# ---------------------------------------------------------------------------
+
+
+def stable_row_id(*parts) -> str:
+    """xxh64 hexdigest of ``":".join(parts)`` — stable per part tuple, used
+    as the ES row id when we want idempotent upserts.
+
+    ``None`` parts become empty strings, everything else is ``str()``-ified.
+    """
+    key = ":".join("" if p is None else str(p) for p in parts)
+    return xxhash.xxh64(key.encode("utf-8", "surrogatepass")).hexdigest()
+
+
+# ---------------------------------------------------------------------------
+# Embedding
+# ---------------------------------------------------------------------------
+
+
+async def encode(embd_mdl, texts: list[str]) -> list:
+    """``LLMBundle.encode`` wrapped in ``thread_pool_exec``.
+
+    Returns the embeddings list (drops the ``used_tokens`` count); empty
+    input returns ``[]``. Caller is responsible for ensuring ``embd_mdl``
+    is a real bundle — use :func:`ensure_llm_bundle` to validate at entry.
+    """
+    if not texts:
+        return []
+    embeddings, _ = await thread_pool_exec(embd_mdl.encode, texts)
+    return list(embeddings)
+
+
+# ---------------------------------------------------------------------------
+# Tokenization for keyword search
+# ---------------------------------------------------------------------------
+
+
+def tokenize_for_search(text: str) -> tuple[str, str]:
+    """Returns ``(content_ltks, content_sm_ltks)`` for a piece of text.
+
+    Empty / non-string input returns ``("", "")``. Used wherever we write a
+    searchable ES row that needs both tokenizations.
+    """
+    if not isinstance(text, str) or not text:
+        return "", ""
+    ltks = rag_tokenizer.tokenize(text)
+    if not ltks:
+        return "", ""
+    sm = rag_tokenizer.fine_grained_tokenize(ltks)
+    return ltks, sm
+
+
+# ---------------------------------------------------------------------------
+# Order-preserving union of string lists
+# ---------------------------------------------------------------------------
+
+
+def union_ordered(*lists: Optional[Iterable]) -> list[str]:
+    """Concatenate iterables and dedupe, preserving first-seen order.
+    Falsy values and non-strings are silently dropped.
+    """
+    seen_set: set[str] = set()
+    seen: list[str] = []
+    for lst in lists:
+        if not lst:
+            continue
+        for v in lst:
+            if not v or not isinstance(v, str):
+                continue
+            if v in seen_set:
+                continue
+            seen_set.add(v)
+            seen.append(v)
+    return seen
+
+
+# ---------------------------------------------------------------------------
+# Token-budget calculation for split_chunks
+# ---------------------------------------------------------------------------
+
+
+def make_input_budget(
+    chat_mdl,
+    *prompts: str,
+    floor: int = 1024,
+    utilization: float = INPUT_UTILIZATION,
+) -> int:
+    """``chat_mdl.max_length * utilization - num_tokens(sum of prompts)``,
+    floored at ``floor``.
+
+    Mirrors the budget idiom used by ``compile_structure_from_text`` and
+    ``wiki_map_from_chunks``: caller passes the constant prompt scaffolding
+    (system prompt + user template) — ``split_chunks`` then sizes batches
+    to leave that much room.
+    """
+    overhead = num_tokens_from_string("".join(p or "" for p in prompts))
+    budget = int(chat_mdl.max_length * utilization) - overhead
+    return max(budget, floor)
+
+
+# ---------------------------------------------------------------------------
+# Defensive LLMBundle validation
+# ---------------------------------------------------------------------------
+
+
+def ensure_llm_bundle(mdl, method: str, *, label: str = "model"):
+    """Return ``mdl`` if it exposes ``method``; otherwise try to unwrap a
+    tuple, otherwise return ``None`` and log an error.
+
+    Common cause for tuple inputs at call sites: ``LLMBundle.encode()`` and
+    similar methods return ``(embeddings, used_tokens)``. If a caller stores
+    the *result* of ``encode()`` into a variable named like
+    ``embedding_model`` and passes that in, we end up with a tuple here.
+    We unwrap with a warning so the pipeline keeps working while the caller
+    is fixed.
+    """
+    if hasattr(mdl, method):
+        return mdl
+    if isinstance(mdl, tuple) and mdl and hasattr(mdl[0], method):
+        logging.warning(
+            "%s arrived as a %s; unwrapping to first element (check the call site — was %s()'s return value passed instead of the LLMBundle?)",
+            label,
+            type(mdl).__name__,
+            method,
+        )
+        return mdl[0]
+    logging.error(
+        "%s has no .%s method (type=%s); aborting",
+        label,
+        method,
+        type(mdl).__name__,
+    )
+    return None
+
+
+# ---------------------------------------------------------------------------
+# ES I/O wrappers
+# ---------------------------------------------------------------------------
+
+
+async def es_search(
+    select_fields: list[str],
+    condition: dict,
+    *,
+    tenant_id: str,
+    kb_ids: list[str],
+    match_expressions: list | None = None,
+    offset: int = 0,
+    limit: int = 1000,
+    label: str = "es_search",
+) -> dict:
+    """Thin wrapper around ``docStoreConn.search`` + ``get_fields``.
+
+    Returns ``{row_id: row_dict}``. Returns ``{}`` on failure (with a
+    logged exception). ``label`` is included in the failure log so each
+    call site is identifiable.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            match_expressions or [],
+            OrderByExpr(),
+            offset,
+            limit,
+            index,
+            kb_ids,
+        )
+        return settings.docStoreConn.get_fields(res, select_fields) or {}
+    except Exception:
+        logging.exception("%s failed (condition=%r)", label, condition)
+        return {}
+
+
+async def es_insert(
+    rows: list[dict],
+    tenant_id: str,
+    kb_id: str,
+    *,
+    label: str = "es_insert",
+) -> None:
+    """Bulk insert wrapped in ``thread_pool_exec``. Logs on failure."""
+    if not rows:
+        return
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    try:
+        await thread_pool_exec(settings.docStoreConn.insert, rows, index, kb_id)
+    except Exception:
+        logging.exception("%s failed (%d row(s))", label, len(rows))
+
+
+async def es_delete(
+    condition: dict,
+    tenant_id: str,
+    kb_id: str,
+    *,
+    label: str = "es_delete",
+) -> None:
+    """Bulk delete wrapped in ``thread_pool_exec``. Best-effort; logs on
+    failure (some callers rely on id-based upsert as a fallback)."""
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    try:
+        await thread_pool_exec(settings.docStoreConn.delete, condition, index, kb_id)
+    except Exception:
+        logging.debug("%s failed (condition=%r); caller may rely on id-upsert", label, condition)
+
+
+async def es_upsert_one(
+    filter_condition: dict,
+    row: dict,
+    tenant_id: str,
+    kb_id: str,
+    *,
+    label: str = "es_upsert_one",
+) -> None:
+    """Delete-by-filter then insert. Used when an in-place update would
+    require knowing the existing row's id and we'd rather drop+re-create.
+
+    Best-effort delete (failures are debug-logged) followed by the insert.
+    Set ``row["id"]`` to a stable value derived from the filter
+    (:func:`stable_row_id`) so id-based dedup at the connector catches any
+    race that bypasses the delete.
+    """
+    await es_delete(filter_condition, tenant_id, kb_id, label=f"{label}.delete")
+    await es_insert([row], tenant_id, kb_id, label=f"{label}.insert")
+
+
+# ---------------------------------------------------------------------------
+# Doc-vector field discovery
+# ---------------------------------------------------------------------------
+
+
+def find_vec_field(doc: dict) -> tuple[Optional[str], Optional[list]]:
+    """Locate the ``q_<dim>_vec`` field on an ES doc dict. Returns
+    ``(field_name, vec)`` or ``(None, None)`` if the doc carries no
+    embedding."""
+    for k, v in doc.items():
+        if isinstance(k, str) and k.startswith("q_") and k.endswith("_vec"):
+            return k, v
+    return None, None
+
+
+# ---------------------------------------------------------------------------
+# Chunked-LLM pipeline engine
+# ---------------------------------------------------------------------------
+#
+# Both artifact MAP and compile_structure_from_text follow the same outer shape:
+#
+#   1. Filter chunks (drop empty text, optionally skip a "resume" set);
+#   2. Pack remaining chunks into batches via ``split_chunks`` sized to leave
+#      room for the prompt scaffolding;
+#   3. Run an LLM-driven ``process_batch`` over each batch in parallel under
+#      an ``asyncio.Semaphore(max_workers)``;
+#   4. Aggregate the per-batch results into a single value.
+#
+# The inner LLM call shape diverges between the pipelines — artifact uses a
+# single ``gen_json`` per batch with ``[CHUNK_ID Cn]``-labelled bodies,
+# structure uses two ``gen_json`` calls (nodes then edges) with ``---``
+# separators and no per-chunk attribution. That divergence lives in each
+# pipeline's ``process_batch`` closure; this engine only owns the scaffold.
+
+
+def _default_chunk_text(chunk: dict) -> str:
+    if not isinstance(chunk, dict):
+        return ""
+    text = chunk.get("text") or chunk.get("content_with_weight") or chunk.get("content") or ""
+    return text if isinstance(text, str) else ""
+
+
+def _default_label(position_in_batch: int) -> str:
+    return f"C{position_in_batch + 1}"
+
+
+def build_chunk_batches(
+    chunks: list[dict],
+    chat_mdl,
+    *,
+    prompt_overhead_tokens: int,
+    resume_chunk_ids: Optional[set[str]] = None,
+    scrub_text: Optional[Callable[[str], str]] = None,
+    label_fn: Callable[[int], str] = _default_label,
+    chunk_text_picker: Optional[Callable[[dict], str]] = None,
+    budget_floor: int = 1024,
+    batch_size_cap: Optional[int] = None,
+    window_fraction: Optional[float] = None,
+) -> tuple[list[list[dict]], dict]:
+    """Filter chunks, pack into batches, return per-batch entries.
+
+    Each batch entry is ``{"label": str, "chunk_id": str, "text": str}``
+    where ``label`` is per-batch positional (default ``C1``, ``C2``, …) and
+    ``text`` is the post-scrub chunk body. Empty or resume-skipped chunks
+    are dropped.
+
+    Two packing modes:
+      - **Default (split_chunks)**: ``input_budget`` derived from
+        ``chat_mdl.max_length * INPUT_UTILIZATION - prompt_overhead_tokens``.
+        Used by ``structure.py`` and the legacy artifact MAP path.
+      - **Cap+fraction (greedy)**: when ``batch_size_cap`` is provided,
+        chunks are packed greedily with two cutoffs — chunk-count exceeds
+        ``batch_size_cap`` OR accumulated tokens exceed
+        ``chat_mdl.max_length * window_fraction``. This is the artifact
+        compilation rule (BS=8, window=0.5).
+
+    Returns ``(batches, info)`` where ``info`` is a small stats dict.
+    """
+    if not chunks:
+        return [], {"total": 0, "kept": 0, "skipped_resume": 0, "skipped_empty": 0, "input_budget": 0, "n_batches": 0}
+
+    picker = chunk_text_picker or _default_chunk_text
+    resume_set = resume_chunk_ids or set()
+
+    chunk_ids: list[str] = []
+    chunk_texts: list[str] = []
+    skipped_resume = 0
+    skipped_empty = 0
+
+    for chunk in chunks:
+        cid = chunk.get("id") or chunk.get("chunk_id")
+        if not cid:
+            skipped_empty += 1
+            continue
+        if cid in resume_set:
+            skipped_resume += 1
+            continue
+        text = picker(chunk)
+        if not text or not text.strip():
+            skipped_empty += 1
+            continue
+        if scrub_text is not None:
+            text = scrub_text(text)
+            if not text or not text.strip():
+                skipped_empty += 1
+                continue
+        chunk_ids.append(cid)
+        chunk_texts.append(text)
+
+    if not chunk_texts:
+        return [], {
+            "total": len(chunks),
+            "kept": 0,
+            "skipped_resume": skipped_resume,
+            "skipped_empty": skipped_empty,
+            "input_budget": 0,
+            "n_batches": 0,
+        }
+
+    batches: list[list[dict]] = []
+    input_budget: int
+
+    if batch_size_cap is not None:
+        # Artifact mode — greedy bin-packing with chunk-count + token caps.
+        fraction = window_fraction if window_fraction is not None else 0.5
+        token_cap = max(int(chat_mdl.max_length * fraction), budget_floor)
+        input_budget = token_cap
+
+        current: list[dict] = []
+        current_tks = 0
+        for idx, text in enumerate(chunk_texts):
+            tks = num_tokens_from_string(text)
+            would_overflow_count = len(current) >= batch_size_cap
+            would_overflow_tokens = current and (current_tks + tks > token_cap)
+            if would_overflow_count or would_overflow_tokens:
+                batches.append(current)
+                current = []
+                current_tks = 0
+            current.append(
+                {
+                    "label": label_fn(len(current)),
+                    "chunk_id": chunk_ids[idx],
+                    "text": text,
+                }
+            )
+            current_tks += tks
+        if current:
+            batches.append(current)
+    else:
+        input_budget = max(
+            int(chat_mdl.max_length * INPUT_UTILIZATION) - prompt_overhead_tokens,
+            budget_floor,
+        )
+
+        raw_batches = split_chunks(chunk_texts, input_budget) or []
+        for batch in raw_batches:
+            packed: list[dict] = []
+            for position, item in enumerate(batch):
+                for idx, text in item.items():
+                    packed.append(
+                        {
+                            "label": label_fn(position),
+                            "chunk_id": chunk_ids[idx],
+                            "text": text,
+                        }
+                    )
+            if packed:
+                batches.append(packed)
+
+    info = {
+        "total": len(chunks),
+        "kept": len(chunk_texts),
+        "skipped_resume": skipped_resume,
+        "skipped_empty": skipped_empty,
+        "input_budget": input_budget,
+        "n_batches": len(batches),
+    }
+    return batches, info
+
+
+async def run_chunked_pipeline(
+    batches: list[list[dict]],
+    *,
+    process_batch: Callable[..., Awaitable[Any]],
+    aggregate: Optional[Callable[[list[Any]], Any]] = None,
+    max_workers: int = 6,
+    callback: Optional[Callable] = None,
+    log_prefix: str = "chunked_pipeline",
+) -> Any:
+    """Run ``process_batch`` over each batch in parallel.
+
+    ``process_batch`` is called as
+    ``await process_batch(entries: list[dict], batch_idx: int, total: int)``
+    and may return anything; ``aggregate`` (if given) is called with the
+    list of per-batch results and its return value is the engine's return.
+    Without ``aggregate`` the raw per-batch results list is returned.
+
+    Cancel-on-error semantics: if any task raises, all sibling tasks are
+    cancelled and the exception propagates.
+    """
+    if not batches:
+        return aggregate([]) if aggregate else []
+
+    total = len(batches)
+    semaphore = asyncio.Semaphore(max_workers) if max_workers and max_workers > 0 else None
+
+    async def _one(idx: int, entries: list[dict]) -> Any:
+        async def _do() -> Any:
+            return await process_batch(entries, idx, total)
+
+        if semaphore is not None:
+            async with semaphore:
+                return await _do()
+        return await _do()
+
+    tasks = [asyncio.create_task(_one(i, b)) for i, b in enumerate(batches) if b]
+    if not tasks:
+        return aggregate([]) if aggregate else []
+
+    try:
+        results = await asyncio.gather(*tasks, return_exceptions=False)
+    except Exception:
+        for t in tasks:
+            t.cancel()
+        await asyncio.gather(*tasks, return_exceptions=True)
+        raise
+
+    if callback:
+        try:
+            callback(1.0, f"{log_prefix}: {total} batch(es) complete")
+        except Exception:
+            logging.debug("%s: completion callback failed", log_prefix, exc_info=True)
+
+    return aggregate(results) if aggregate else results
+
+
+# ---------------------------------------------------------------------------
+# Bulk dedup engine — exact + embedding + LLM disambiguation
+# ---------------------------------------------------------------------------
+#
+# Replaces wiki's _wiki_exact_dedup_entities / _wiki_exact_dedup_concepts /
+# _wiki_embedding_dedup_entities / _wiki_resolve_ambiguous_entities /
+# _wiki_apply_merges with one parameterised engine. structure.py's
+# merge_compiled_structures uses a different algorithm (incremental
+# kept-set + per-pair LLM judgement) and stays as-is.
+
+
+_PUNCT_TABLE = str.maketrans("", "", string.punctuation)
+
+
+DEFAULT_DISAMBIGUATE_SYSTEM = "You are a named-entity resolution assistant. Return only JSON."
+
+
+def normalize_key(name) -> str:
+    """Lowercase + strip whitespace + strip ASCII punctuation. Used as the
+    bucket key for exact dedup."""
+    if not isinstance(name, str):
+        return ""
+    return name.lower().strip().translate(_PUNCT_TABLE)
+
+
+def _exact_dedup_by_key(
+    items: list[dict],
+    *,
+    name_key: str,
+    type_key: Optional[str] = None,
+    aggregate_extra: Optional[Callable[[list[dict]], dict]] = None,
+) -> list[dict]:
+    """Group items by ``(normalize(item[name_key]), item.get(type_key))``.
+
+    Canonical record per group:
+      - ``<name_key>``: the most-common spelling across the group
+      - ``<type_key>`` (if given): the group's shared value
+      - ``aliases``: sorted union of every name + every input alias, minus
+        the canonical name
+      - ``mention_count``: sum of input ``mention_count`` values (defaults
+        to ``1`` per missing)
+      - ``chunk_ids``: order-preserving union
+      - ``_norm``: the normalized key (stripped by ``bulk_dedup_items``)
+      - any extras from ``aggregate_extra(group)``
+    """
+    groups: dict[tuple, list[dict]] = {}
+    for it in items:
+        if not isinstance(it, dict):
+            continue
+        norm = normalize_key(it.get(name_key, ""))
+        if not norm:
+            continue
+        key = (norm, it.get(type_key) if type_key else None)
+        groups.setdefault(key, []).append(it)
+
+    canonical: list[dict] = []
+    for (norm, type_val), group in groups.items():
+        name_counts: dict[str, int] = {}
+        for it in group:
+            n = it.get(name_key, "")
+            if isinstance(n, str) and n:
+                name_counts[n] = name_counts.get(n, 0) + 1
+        best = max(name_counts, key=lambda k: name_counts[k]) if name_counts else ""
+
+        aliases: set[str] = set()
+        chunk_id_lists: list[list] = []
+        mention_count = 0
+        for it in group:
+            n = it.get(name_key, "")
+            if isinstance(n, str) and n:
+                aliases.add(n)
+            for a in it.get("aliases") or []:
+                if isinstance(a, str) and a:
+                    aliases.add(a)
+            chunk_id_lists.append(it.get("chunk_ids") or [])
+            mention_count += int(it.get("mention_count") or 1)
+        aliases.discard(best)
+
+        record: dict = {
+            name_key: best,
+            "aliases": sorted(aliases),
+            "mention_count": mention_count,
+            "chunk_ids": union_ordered(*chunk_id_lists),
+            "_norm": norm,
+        }
+        if type_key:
+            record[type_key] = type_val
+        if aggregate_extra is not None:
+            try:
+                extras = aggregate_extra(group) or {}
+                if isinstance(extras, dict):
+                    record.update(extras)
+            except Exception:
+                logging.exception("bulk_dedup: aggregate_extra failed for group %r", norm)
+        canonical.append(record)
+
+    return canonical
+
+
+async def _embedding_dedup(
+    canonical: list[dict],
+    embd_mdl,
+    *,
+    name_key: str,
+    type_key: Optional[str] = None,
+    merge_threshold: float = 0.90,
+    ambiguous_low: float = 0.75,
+) -> tuple[dict[int, int], list[tuple[int, int]], Optional[list]]:
+    """Vectorised pairwise cosine; same-type-only when ``type_key`` given.
+
+    Returns ``(merged_into, ambiguous_pairs, vectors)``. ``merged_into``
+    is a union-find map ``index → parent_index``. ``ambiguous_pairs`` is the
+    [ambiguous_low, merge_threshold) bucket (after removing pairs already
+    linked by auto-merges). ``vectors`` is ``None`` on embedding failure
+    (caller should skip dedup).
+    """
+    n = len(canonical)
+    if n <= 1:
+        return {}, [], []
+
+    names = [it.get(name_key, "") for it in canonical]
+    try:
+        vectors = await encode(embd_mdl, names)
+    except Exception:
+        logging.exception("bulk_dedup: embedding batch failed")
+        return {}, [], None
+    if vectors is None or len(vectors) != n:
+        return {}, [], None
+
+    try:
+        from sklearn.metrics.pairwise import cosine_similarity
+        import numpy as np
+
+        matrix = np.asarray([list(v) for v in vectors], dtype=float)
+        sims = cosine_similarity(matrix)
+    except Exception:
+        logging.exception("bulk_dedup: pairwise cosine failed; skipping")
+        return {}, [], vectors
+
+    merged_into: dict[int, int] = {}
+
+    def _root(i: int) -> int:
+        while i in merged_into:
+            i = merged_into[i]
+        return i
+
+    auto_pairs: list[tuple[int, int]] = []
+    ambiguous_pairs: list[tuple[int, int]] = []
+
+    for i in range(n):
+        for j in range(i + 1, n):
+            if type_key and canonical[i].get(type_key) != canonical[j].get(type_key):
+                continue
+            s = float(sims[i, j])
+            if s >= merge_threshold:
+                auto_pairs.append((i, j))
+            elif s >= ambiguous_low:
+                ambiguous_pairs.append((i, j))
+
+    for i, j in auto_pairs:
+        ri, rj = _root(i), _root(j)
+        if ri == rj:
+            continue
+        if canonical[ri].get("mention_count", 0) >= canonical[rj].get("mention_count", 0):
+            merged_into[rj] = ri
+        else:
+            merged_into[ri] = rj
+
+    still_ambiguous = [(i, j) for i, j in ambiguous_pairs if _root(i) != _root(j)]
+    return merged_into, still_ambiguous, vectors
+
+
+async def _resolve_ambiguous_pairs(
+    canonical: list[dict],
+    ambiguous_pairs: list[tuple[int, int]],
+    merged_into: dict[int, int],
+    chat_mdl,
+    *,
+    name_key: str,
+    type_key: Optional[str] = None,
+    batch_size: int = 50,
+    llm_timeout: int = 60,
+    system_prompt: str = DEFAULT_DISAMBIGUATE_SYSTEM,
+) -> dict[int, int]:
+    """LLM-judged disambiguation in batches; returns updated ``merged_into``."""
+    if not ambiguous_pairs:
+        return merged_into
+
+    def _root(i: int) -> int:
+        while i in merged_into:
+            i = merged_into[i]
+        return i
+
+    for start in range(0, len(ambiguous_pairs), batch_size):
+        batch = ambiguous_pairs[start : start + batch_size]
+        batch = [(i, j) for i, j in batch if _root(i) != _root(j)]
+        if not batch:
+            continue
+
+        lines: list[str] = []
+        for k, (i, j) in enumerate(batch):
+            a_type = f" ({canonical[i].get(type_key, '')})" if type_key else ""
+            b_type = f" ({canonical[j].get(type_key, '')})" if type_key else ""
+            lines.append(f'{k + 1}. "{canonical[i].get(name_key, "")}"{a_type} vs "{canonical[j].get(name_key, "")}"{b_type}')
+
+        user_prompt = (
+            "For each pair below, determine if they refer to the same real-world entity.\n"
+            f"Return a JSON array of exactly {len(batch)} booleans "
+            "(true = same entity, false = different).\n"
+            "Return ONLY the JSON array.\n\n" + "\n".join(lines)
+        )
+
+        try:
+            res = await asyncio.wait_for(
+                gen_json(system_prompt, user_prompt, chat_mdl, gen_conf={"temperature": 0.0}),
+                timeout=llm_timeout,
+            )
+        except asyncio.TimeoutError:
+            logging.warning("bulk_dedup: disambiguation timed out (%d pairs)", len(batch))
+            continue
+        except Exception:
+            logging.exception("bulk_dedup: disambiguation call failed (%d pairs)", len(batch))
+            continue
+
+        decisions = None
+        if isinstance(res, list):
+            decisions = res
+        elif isinstance(res, dict):
+            for v in res.values():
+                if isinstance(v, list):
+                    decisions = v
+                    break
+        if not isinstance(decisions, list):
+            logging.warning("bulk_dedup: disambiguation returned unexpected shape: %r", type(res))
+            continue
+
+        for k, (i, j) in enumerate(batch):
+            verdict = decisions[k] if k < len(decisions) else False
+            if not verdict:
+                continue
+            ri, rj = _root(i), _root(j)
+            if ri == rj:
+                continue
+            if canonical[ri].get("mention_count", 0) >= canonical[rj].get("mention_count", 0):
+                merged_into[rj] = ri
+            else:
+                merged_into[ri] = rj
+
+    return merged_into
+
+
+def _apply_dedup_merges(
+    canonical: list[dict],
+    merged_into: dict[int, int],
+    *,
+    name_key: str,
+) -> list[dict]:
+    """Union-find collapse: sum ``mention_count``, union ``aliases`` and
+    ``chunk_ids`` per canonical."""
+
+    def _root(i: int) -> int:
+        while i in merged_into:
+            i = merged_into[i]
+        return i
+
+    roots: set[int] = {_root(i) for i in range(len(canonical))}
+    out: list[dict] = []
+    for ri in roots:
+        base = dict(canonical[ri])
+        aliases: set[str] = set(base.get("aliases") or [])
+        chunk_id_lists: list[list] = [base.get("chunk_ids") or []]
+        mention_count = int(base.get("mention_count") or 0)
+        for i, it in enumerate(canonical):
+            if i == ri or _root(i) != ri:
+                continue
+            mention_count += int(it.get("mention_count") or 0)
+            aliases.update(it.get("aliases") or [])
+            n = it.get(name_key)
+            if isinstance(n, str) and n:
+                aliases.add(n)
+            chunk_id_lists.append(it.get("chunk_ids") or [])
+        aliases.discard(base.get(name_key) or "")
+        base["aliases"] = sorted(aliases)
+        base["mention_count"] = mention_count
+        base["chunk_ids"] = union_ordered(*chunk_id_lists)
+        out.append(base)
+    return out
+
+
+async def bulk_dedup_items(
+    items: list[dict],
+    *,
+    name_key: str,
+    type_key: Optional[str] = None,
+    chat_mdl=None,
+    embd_mdl=None,
+    merge_threshold: float = 0.90,
+    ambiguous_low: float = 0.75,
+    ambiguous_batch_size: int = 50,
+    disambiguate_system_prompt: str = DEFAULT_DISAMBIGUATE_SYSTEM,
+    llm_timeout: int = 60,
+    aggregate_extra: Optional[Callable[[list[dict]], dict]] = None,
+    strip_norm_key: bool = True,
+) -> list[dict]:
+    """Three-phase dedup → canonical items.
+
+    Phase 1 (always): exact dedup by ``(normalize(item[name_key]),
+    item.get(type_key))`` — groups by normalized key, sums mention_count,
+    unions aliases and chunk_ids, optionally adds extras via
+    ``aggregate_extra(group)``.
+
+    Phase 2 (when ``embd_mdl`` is provided AND ``len(canonical) > 1``):
+    vectorised pairwise cosine over the canonical ``name_key`` values.
+    Pairs at similarity ≥ ``merge_threshold`` auto-merge; pairs in
+    ``[ambiguous_low, merge_threshold)`` move to phase 3. When ``type_key``
+    is given, pairs are only considered when both endpoints share the same
+    type. Embedding failures cause this phase (and 3) to be skipped.
+
+    Phase 3 (when ``chat_mdl`` is provided AND ambiguous pairs remain):
+    batched LLM disambiguation via ``gen_json`` — each batch asks for a
+    JSON array of booleans. True verdicts join the union-find.
+
+    Apply: union-find collapse — sum mention_count, union aliases /
+    chunk_ids per canonical.
+
+    Setting both ``chat_mdl`` and ``embd_mdl`` to ``None`` makes this an
+    exact-dedup-only call (which is what artifact uses for concepts).
+    """
+    canonical = _exact_dedup_by_key(
+        items,
+        name_key=name_key,
+        type_key=type_key,
+        aggregate_extra=aggregate_extra,
+    )
+
+    if len(canonical) > 1 and embd_mdl is not None:
+        merged_into, ambig, vectors = await _embedding_dedup(
+            canonical,
+            embd_mdl,
+            name_key=name_key,
+            type_key=type_key,
+            merge_threshold=merge_threshold,
+            ambiguous_low=ambiguous_low,
+        )
+        if vectors is None:
+            logging.warning("bulk_dedup: embedding phase skipped — keeping exact-dedup result")
+        else:
+            if chat_mdl is not None and ambig:
+                merged_into = await _resolve_ambiguous_pairs(
+                    canonical,
+                    ambig,
+                    merged_into,
+                    chat_mdl,
+                    name_key=name_key,
+                    type_key=type_key,
+                    batch_size=ambiguous_batch_size,
+                    llm_timeout=llm_timeout,
+                    system_prompt=disambiguate_system_prompt,
+                )
+            canonical = _apply_dedup_merges(canonical, merged_into, name_key=name_key)
+
+    if strip_norm_key:
+        for it in canonical:
+            it.pop("_norm", None)
+    return canonical
+
+
+__all__ = [
+    "stable_row_id",
+    "encode",
+    "tokenize_for_search",
+    "union_ordered",
+    "make_input_budget",
+    "ensure_llm_bundle",
+    "es_search",
+    "es_insert",
+    "es_delete",
+    "es_upsert_one",
+    "find_vec_field",
+    # New engines
+    "normalize_key",
+    "build_chunk_batches",
+    "run_chunked_pipeline",
+    "bulk_dedup_items",
+    "DEFAULT_DISAMBIGUATE_SYSTEM",
+]
diff --git a/rag/advanced_rag/knowlege_compile/dataset_nav.py b/rag/advanced_rag/knowlege_compile/dataset_nav.py
new file mode 100644
index 0000000000..e1551eae7f
--- /dev/null
+++ b/rag/advanced_rag/knowlege_compile/dataset_nav.py
@@ -0,0 +1,437 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+"""Dataset-level navigation markdown for tree-kind compilations.
+
+After a doc finishes a ``tree``-kind compilation template the helper
+``upsert_dataset_nav_doc`` here appends (or refreshes) one line in the
+KB's nav markdown — one line per doc, each line carrying the doc id +
+a short summary lifted from the per-doc tree's root.
+
+Storage: a single ES row per KB under ``compile_kwd="dataset_nav"``,
+``available_int=0`` (so retrievers never surface it). The markdown
+body lives in ``md_with_weight``; ``doc_count_int`` and ``doc_ids_kwd``
+mirror the markdown's order for fast cap-check and dedup.
+
+Concurrency: every write is wrapped in a ``RedisDistributedLock``
+keyed by ``f"dataset_nav:{kb_id}"`` — multiple task executors
+finishing tree templates for the same KB in parallel must not
+interleave their read-modify-writes.
+
+The router/retrieval side that *consumes* this markdown is
+intentionally out of scope here.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+from typing import Any, Iterable
+
+import xxhash
+
+from common.token_utils import num_tokens_from_string
+from rag.utils.redis_conn import RedisDistributedLock
+
+
+# Hard cap on the number of docs we record in the nav markdown.
+# Beyond this we no-op on adds; the next doc to drop out of the KB
+# frees a slot via ``remove_dataset_nav_doc``.
+MAX_DATASET_NAV_DOCS = 128
+
+# Hard cap on the per-doc summary length, in tokens. Long summaries
+# bloat the markdown and slow downstream LLM passes that ingest the
+# whole nav blob; 128 tokens is enough for 1-2 sentences in either
+# Chinese or English text.
+MAX_DOC_SUMMARY_TOKENS = 128
+
+_COMPILE_KWD = "dataset_nav"
+
+# Lock TTL — long enough that an ES round-trip can't expire it mid-write
+# but short enough that a crashed executor doesn't pin the KB.
+_LOCK_TIMEOUT_S = 30
+_LOCK_BLOCKING_TIMEOUT_S = 5
+
+
+def _nav_row_id(kb_id: str) -> str:
+    """Stable per-KB row id. Mirrors the pattern used by ``skill_all``."""
+    return xxhash.xxh64(
+        f"dataset_nav:{kb_id}".encode("utf-8", "surrogatepass"),
+    ).hexdigest()
+
+
+def _nav_lock_key(kb_id: str) -> str:
+    return f"dataset_nav:{kb_id}"
+
+
+# Each line of the markdown looks like ``- **<doc_id>**: <summary>``.
+# The ``doc_id`` part is anchored at the start of a bullet so a simple
+# regex can locate the line on remove without touching adjacent lines.
+_LINE_RE = re.compile(r"^- \*\*([^*]+)\*\*:.*$")
+
+
+def _format_line(doc_id: str, summary: str) -> str:
+    # Strip newlines from the summary so each doc stays on a single
+    # markdown line. Multi-line summaries break the dedup regex and
+    # confuse downstream consumers that split on ``\n``.
+    one_line = summary.replace("\n", " ").replace("\r", " ").strip()
+    return f"- **{doc_id}**: {one_line}"
+
+
+def _truncate_summary(text: str) -> str:
+    """Trim ``text`` to ``MAX_DOC_SUMMARY_TOKENS`` tokens.
+
+    Uses the project's tokenizer so the cap matches what the LLM will
+    see. Falls back to a generous character cap if tokenization is
+    unavailable.
+    """
+    if not text:
+        return ""
+    text = text.strip()
+    try:
+        n = num_tokens_from_string(text)
+    except Exception:
+        # Best-effort character cap — 4 chars per token is a safe lower
+        # bound for English; Chinese is closer to 1 char per token but
+        # 4x still keeps the row size sane.
+        return text[: MAX_DOC_SUMMARY_TOKENS * 4]
+    if n <= MAX_DOC_SUMMARY_TOKENS:
+        return text
+    # Binary-search the right character cut so we land at exactly the
+    # token budget. ``num_tokens_from_string`` is cheap enough that a
+    # handful of probes per call is fine.
+    lo, hi = 0, len(text)
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        try:
+            tn = num_tokens_from_string(text[:mid])
+        except Exception:
+            tn = mid // 4
+        if tn <= MAX_DOC_SUMMARY_TOKENS:
+            lo = mid
+        else:
+            hi = mid - 1
+    return text[:lo].rstrip()
+
+
+def _extract_root_summary_from_tree(tree: dict | None) -> str:
+    """Pull the doc-level abstract out of a RAPTOR-built tree.
+
+    Convention used by ``_raptor_tree_to_graph``: the root node's
+    ``title`` field carries the LLM summary at the highest layer.
+    Internal nodes lower in the tree carry their own per-cluster
+    summaries. We just take the root.
+    """
+    if not isinstance(tree, dict):
+        return ""
+    title = tree.get("title") or ""
+    if isinstance(title, str) and title.strip():
+        return title.strip()
+    # Some RAPTOR shapes use ``summary`` or ``content`` instead.
+    for alt in ("summary", "content_with_weight", "content"):
+        v = tree.get(alt)
+        if isinstance(v, str) and v.strip():
+            return v.strip()
+    return ""
+
+
+def _parse_existing_lines(md: str) -> list[tuple[str, str]]:
+    """Return ``(doc_id, raw_line)`` tuples in markdown order.
+
+    We keep the *raw* line so callers that just want to update one
+    doc's line don't have to re-derive the formatting. Lines that
+    don't match the per-doc shape (e.g. headers, blank lines) are
+    skipped — they're never written by this module, but a future
+    schema bump might add them and we shouldn't crash on it.
+    """
+    out: list[tuple[str, str]] = []
+    if not md:
+        return out
+    for line in md.splitlines():
+        m = _LINE_RE.match(line)
+        if not m:
+            continue
+        out.append((m.group(1), line))
+    return out
+
+
+def _render_md(entries: Iterable[tuple[str, str]]) -> str:
+    return "\n".join(line for _doc_id, line in entries)
+
+
+def _row_id_field(row: dict | None) -> dict:
+    if row and isinstance(row, dict):
+        return row
+    return {}
+
+
+async def _get_existing(tenant_id: str, kb_id: str) -> dict | None:
+    """Read the existing nav row, or ``None`` if it doesn't exist yet."""
+    from common import settings
+    from common.misc_utils import thread_pool_exec
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    if not settings.docStoreConn.index_exist(index, kb_id):
+        return None
+    try:
+        existing = await thread_pool_exec(
+            settings.docStoreConn.get,
+            _nav_row_id(kb_id),
+            index,
+            [kb_id],
+        )
+    except Exception:
+        logging.exception(
+            "dataset_nav: read failed for kb=%s",
+            kb_id,
+        )
+        return None
+    return _row_id_field(existing) or None
+
+
+async def _write_row(tenant_id: str, kb_id: str, payload: dict) -> None:
+    """Upsert the nav row in the doc store."""
+    from common import settings
+    from common.misc_utils import thread_pool_exec
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    row_id = _nav_row_id(kb_id)
+    payload = {
+        "id": row_id,
+        "kb_id": kb_id,
+        "doc_id": kb_id,
+        "compile_kwd": _COMPILE_KWD,
+        "knowledge_graph_kwd": "graph",
+        "available_int": 0,
+        **payload,
+    }
+    existing = await thread_pool_exec(
+        settings.docStoreConn.get,
+        row_id,
+        index,
+        [kb_id],
+    )
+    if existing:
+        await thread_pool_exec(
+            settings.docStoreConn.update,
+            {"id": row_id},
+            {k: v for k, v in payload.items() if k != "id"},
+            index,
+            kb_id,
+        )
+    else:
+        await thread_pool_exec(
+            settings.docStoreConn.insert,
+            [payload],
+            index,
+            kb_id,
+        )
+
+
+# --------------------------------------------------------------------
+# Public surface
+# --------------------------------------------------------------------
+
+
+async def upsert_dataset_nav_doc(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+    summary_or_tree: Any,
+) -> None:
+    """Add or refresh a doc's line in the KB's nav markdown.
+
+    ``summary_or_tree`` can be:
+      - a plain string (taken as-is and truncated to ``MAX_DOC_SUMMARY_TOKENS``)
+      - a tree dict (the root summary is extracted via
+        ``_extract_root_summary_from_tree``)
+
+    The 128-doc cap is enforced here: if the doc isn't already in the
+    markdown and the row is full, the call is a no-op. Existing docs
+    always get their summary updated regardless of count.
+
+    Called from ``_run_tree_templates`` after each successful
+    ``_struct_upsert_graph_json``.
+    """
+    if not doc_id or not kb_id:
+        return
+
+    if isinstance(summary_or_tree, dict):
+        summary = _extract_root_summary_from_tree(summary_or_tree)
+    elif isinstance(summary_or_tree, str):
+        summary = summary_or_tree
+    else:
+        summary = ""
+    summary = _truncate_summary(summary)
+    if not summary:
+        # Nothing to record — a tree with no root summary means the
+        # RAPTOR pass produced a degenerate result; safer to skip than
+        # to write an empty line.
+        logging.info(
+            "dataset_nav: skipping doc=%s (kb=%s) — no usable summary",
+            doc_id,
+            kb_id,
+        )
+        return
+
+    new_line = _format_line(doc_id, summary)
+    lock = RedisDistributedLock(
+        _nav_lock_key(kb_id),
+        timeout=_LOCK_TIMEOUT_S,
+        blocking_timeout=_LOCK_BLOCKING_TIMEOUT_S,
+    )
+    try:
+        await lock.spin_acquire()
+    except Exception:
+        logging.exception(
+            "dataset_nav: lock acquire failed for kb=%s; proceeding lock-free",
+            kb_id,
+        )
+
+    try:
+        existing = await _get_existing(tenant_id, kb_id)
+        md = (existing or {}).get("md_with_weight") or ""
+        entries = _parse_existing_lines(md)
+
+        replaced = False
+        for i, (existing_doc_id, _) in enumerate(entries):
+            if existing_doc_id == doc_id:
+                entries[i] = (doc_id, new_line)
+                replaced = True
+                break
+        if not replaced:
+            if len(entries) >= MAX_DATASET_NAV_DOCS:
+                logging.info(
+                    "dataset_nav: kb=%s already at cap (%d); skipping doc=%s",
+                    kb_id,
+                    MAX_DATASET_NAV_DOCS,
+                    doc_id,
+                )
+                return
+            entries.append((doc_id, new_line))
+
+        payload = {
+            "md_with_weight": _render_md(entries),
+            "doc_count_int": len(entries),
+            "doc_ids_kwd": [doc_id for doc_id, _ in entries],
+        }
+        try:
+            await _write_row(tenant_id, kb_id, payload)
+        except Exception:
+            logging.exception(
+                "dataset_nav: write failed for kb=%s doc=%s",
+                kb_id,
+                doc_id,
+            )
+    finally:
+        try:
+            lock.release()
+        except Exception:
+            logging.exception("dataset_nav: lock release failed for kb=%s", kb_id)
+
+
+async def remove_dataset_nav_doc(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+) -> None:
+    """Remove ``doc_id``'s line from the KB's nav markdown.
+
+    Called from ``DocumentService.remove_document`` so the markdown
+    stays in sync with the KB's doc set. No-op if the row doesn't
+    exist or the doc isn't represented in it.
+    """
+    if not doc_id or not kb_id:
+        return
+
+    lock = RedisDistributedLock(
+        _nav_lock_key(kb_id),
+        timeout=_LOCK_TIMEOUT_S,
+        blocking_timeout=_LOCK_BLOCKING_TIMEOUT_S,
+    )
+    try:
+        await lock.spin_acquire()
+    except Exception:
+        logging.exception(
+            "dataset_nav: lock acquire failed for kb=%s; proceeding lock-free",
+            kb_id,
+        )
+
+    try:
+        existing = await _get_existing(tenant_id, kb_id)
+        if not existing:
+            return
+        md = existing.get("md_with_weight") or ""
+        entries = _parse_existing_lines(md)
+        before = len(entries)
+        entries = [(d, line) for (d, line) in entries if d != doc_id]
+        if len(entries) == before:
+            return
+
+        payload = {
+            "md_with_weight": _render_md(entries),
+            "doc_count_int": len(entries),
+            "doc_ids_kwd": [d for d, _ in entries],
+        }
+        try:
+            await _write_row(tenant_id, kb_id, payload)
+        except Exception:
+            logging.exception(
+                "dataset_nav: remove-write failed for kb=%s doc=%s",
+                kb_id,
+                doc_id,
+            )
+    finally:
+        try:
+            lock.release()
+        except Exception:
+            logging.exception("dataset_nav: lock release failed for kb=%s", kb_id)
+
+
+def remove_dataset_nav_doc_sync(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+) -> None:
+    """Sync wrapper around ``remove_dataset_nav_doc``.
+
+    ``DocumentService.remove_document`` is synchronous (Peewee-driven)
+    and the doc-store helpers it calls are sync too. We need a sync
+    bridge so the delete path can invoke this without spinning up an
+    event loop.
+
+    Strategy: run the async helper on the current loop if one is
+    available; otherwise spin a fresh loop for the duration of the
+    call. Any failure is logged and swallowed — the doc-delete path
+    must never fail because of nav-md cleanup.
+    """
+    try:
+        loop = asyncio.new_event_loop()
+        try:
+            loop.run_until_complete(
+                remove_dataset_nav_doc(tenant_id, kb_id, doc_id),
+            )
+        finally:
+            loop.close()
+    except Exception:
+        logging.exception(
+            "dataset_nav: sync remove failed for kb=%s doc=%s",
+            kb_id,
+            doc_id,
+        )
diff --git a/rag/graphrag/general/mind_map_extractor.py b/rag/advanced_rag/knowlege_compile/mind_map_extractor.py
similarity index 79%
rename from rag/graphrag/general/mind_map_extractor.py
rename to rag/advanced_rag/knowlege_compile/mind_map_extractor.py
index 354d3d0968..79b0f639d1 100644
--- a/rag/graphrag/general/mind_map_extractor.py
+++ b/rag/advanced_rag/knowlege_compile/mind_map_extractor.py
@@ -33,6 +33,7 @@ from common.token_utils import num_tokens_from_string
 @dataclass
 class MindMapResult:
     """Unipartite Mind Graph result class definition."""
+
     output: dict
 
 
@@ -42,14 +43,12 @@ class MindMapExtractor(Extractor):
     _on_error: ErrorHandlerFn
 
     def __init__(
-            self,
-            llm_invoker: CompletionLLM,
-            prompt: str | None = None,
-            input_text_key: str | None = None,
-            on_error: ErrorHandlerFn | None = None,
+        self,
+        llm_invoker: CompletionLLM,
+        prompt: str | None = None,
+        input_text_key: str | None = None,
+        on_error: ErrorHandlerFn | None = None,
     ):
-        """Init method definition."""
-        # TODO: streamline construction
         self._llm = llm_invoker
         self._input_text_key = input_text_key or "input_text"
         self._mind_map_prompt = prompt or MIND_MAP_EXTRACTION_PROMPT
@@ -70,17 +69,10 @@ class MindMapExtractor(Extractor):
             k = self._key(k)
             if k and k not in keyset:
                 keyset.add(k)
-                arr.append(
-                    {
-                        "id": k,
-                        "children": self._be_children(v, keyset)
-                    }
-                )
+                arr.append({"id": k, "children": self._be_children(v, keyset)})
         return arr
 
-    async def __call__(
-            self, sections: list[str], prompt_variables: dict[str, Any] | None = None
-    ) -> MindMapResult:
+    async def __call__(self, sections: list[str], prompt_variables: dict[str, Any] | None = None) -> MindMapResult:
         """Call method definition."""
         if prompt_variables is None:
             prompt_variables = {}
@@ -93,18 +85,14 @@ class MindMapExtractor(Extractor):
         for i in range(len(sections)):
             section_cnt = num_tokens_from_string(sections[i])
             if cnt + section_cnt >= token_count and texts:
-                tasks.append(asyncio.create_task(
-                    self._process_document("".join(texts), prompt_variables, res)
-                ))
+                tasks.append(asyncio.create_task(self._process_document("".join(texts), prompt_variables, res)))
                 texts = []
                 cnt = 0
 
             texts.append(sections[i])
             cnt += section_cnt
         if texts:
-            tasks.append(asyncio.create_task(
-                self._process_document("".join(texts), prompt_variables, res)
-            ))
+            tasks.append(asyncio.create_task(self._process_document("".join(texts), prompt_variables, res)))
         try:
             await asyncio.gather(*tasks, return_exceptions=False)
         except Exception as e:
@@ -119,16 +107,7 @@ class MindMapExtractor(Extractor):
         if len(merge_json) > 1:
             keys = [re.sub(r"\*+", "", k) for k, v in merge_json.items() if isinstance(v, dict)]
             keyset = set(i for i in keys if i)
-            merge_json = {
-                "id": "root",
-                "children": [
-                    {
-                        "id": self._key(k),
-                        "children": self._be_children(v, keyset)
-                    }
-                    for k, v in merge_json.items() if isinstance(v, dict) and self._key(k)
-                ]
-            }
+            merge_json = {"id": "root", "children": [{"id": self._key(k), "children": self._be_children(v, keyset)} for k, v in merge_json.items() if isinstance(v, dict) and self._key(k)]}
         else:
             k = self._key(list(merge_json.keys())[0])
             merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], {k})}
@@ -176,9 +155,7 @@ class MindMapExtractor(Extractor):
 
         return self._list_to_kv(to_ret)
 
-    async def _process_document(
-            self, text: str, prompt_variables: dict[str, str], out_res
-    ) -> str:
+    async def _process_document(self, text: str, prompt_variables: dict[str, str], out_res) -> str:
         variables = {
             **prompt_variables,
             self._input_text_key: text,
diff --git a/rag/raptor.py b/rag/advanced_rag/knowlege_compile/raptor.py
similarity index 68%
rename from rag/raptor.py
rename to rag/advanced_rag/knowlege_compile/raptor.py
index d39964f70f..b3b853642e 100644
--- a/rag/raptor.py
+++ b/rag/advanced_rag/knowlege_compile/raptor.py
@@ -19,7 +19,6 @@ import logging
 import re
 
 import numpy as np
-import umap
 from sklearn.cluster import AgglomerativeClustering
 from sklearn.mixture import GaussianMixture
 
@@ -54,6 +53,12 @@ class _PsiTreeNode:
     embedding: np.ndarray | None = None
     children: list["_PsiTreeNode"] = field(default_factory=list)
     parent: "_PsiTreeNode | None" = None
+    # Original (leaf-level) chunk ids that contributed to this node. On
+    # a leaf this is a single-element list with the leaf's own id; on an
+    # internal node it's the order-preserving deduped union of its
+    # children's lists. Carried up through the merge tree so each
+    # produced summary knows which source chunks it covers.
+    source_chunk_ids: list[str] = field(default_factory=list)
 
 
 class _PsiUnionFind:
@@ -150,7 +155,7 @@ class _PsiUnionFind:
     @property
     def tree(self) -> list[int]:
         """Return the compact child-to-parent array for constructed nodes."""
-        return self._tree[:self._next_id]
+        return self._tree[: self._next_id]
 
 
 class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
@@ -209,7 +214,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
                 if response.find("**ERROR**") >= 0:
                     raise Exception(response)
-                await thread_pool_exec(set_llm_cache,self._llm_model.llm_name,system,response,history,gen_conf)
+                await thread_pool_exec(set_llm_cache, self._llm_model.llm_name, system, response, history, gen_conf)
                 return response
             except Exception as exc:
                 last_exc = exc
@@ -305,6 +310,61 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
             labels = np.array([remap[int(lbl)] for lbl in new_labels])
         return labels
 
+    def clustering(self, embeddings, random_state: int, task_id: str = "") -> tuple[int, list[int]]:
+        """Cluster one RAPTOR layer and return contiguous labels."""
+        reduced_embeddings = np.asarray(embeddings, dtype=np.float64)
+        if len(reduced_embeddings) == 0:
+            return 0, []
+
+        # Degrade too much ??
+        n_neighbors = int((len(embeddings) - 1) ** 0.8)
+        import umap
+
+        reduced_embeddings = umap.UMAP(
+            n_neighbors=max(2, n_neighbors),
+            n_components=min(12, len(embeddings) - 2),
+            metric="cosine",
+        ).fit_transform(embeddings)
+        if self._clustering_method == AHC_CLUSTERING_METHOD:
+            logging.info("RAPTOR: using clustering_method=%s before _get_clusters_ahc", self._clustering_method)
+            raw_labels = self._get_clusters_ahc(reduced_embeddings, task_id=task_id)
+            raw_cluster_count = np.unique(raw_labels).size
+            logging.info("RAPTOR AHC: _get_clusters_ahc produced n_clusters=%d", raw_cluster_count)
+            if raw_cluster_count > 1:
+                labels = self._adjust_tree_nodes(reduced_embeddings, raw_labels)
+                adjusted_cluster_count = np.unique(labels).size
+                logging.info("RAPTOR AHC: _adjust_tree_nodes adjusted n_clusters=%d", adjusted_cluster_count)
+            else:
+                labels = raw_labels
+                logging.warning("RAPTOR AHC: _adjust_tree_nodes skipped because _get_clusters_ahc returned one cluster")
+        else:
+            n_clusters = int(self._get_optimal_clusters(reduced_embeddings, random_state, task_id=task_id))
+            if n_clusters <= 1:
+                labels = [0 for _ in range(len(reduced_embeddings))]
+            else:
+                gm = GaussianMixture(n_components=n_clusters, random_state=random_state)
+                gm.fit(reduced_embeddings)
+                probs = gm.predict_proba(reduced_embeddings)
+                labels = []
+                for prob in probs:
+                    candidates = np.where(prob > self._threshold)[0]
+                    labels.append(int(candidates[0]) if len(candidates) else int(np.argmax(prob)))
+
+        normalized_labels: list[int] = []
+        for label in labels:
+            if isinstance(label, np.ndarray):
+                normalized_labels.append(int(label[0]) if len(label) else 0)
+            else:
+                normalized_labels.append(int(label))
+
+        if len(normalized_labels) <= 0:
+            return 0, []
+        unique_labels = np.unique(normalized_labels)
+        if len(unique_labels) <= 1:
+            return 1, [0 for _ in normalized_labels]
+        label_map = {int(old): idx for idx, old in enumerate(unique_labels)}
+        return len(unique_labels), [label_map[label] for label in normalized_labels]
+
     @timeout(60 * 20)
     async def _summarize_texts(self, texts: list[str], callback=None, task_id: str = ""):
         """Summarize a cluster and return text plus embedding when successful."""
@@ -317,11 +377,11 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 self._check_task_canceled(task_id, "before LLM call")
 
                 cnt = await self._chat(
-                    "You're a helpful assistant.",
+                    "You're a helpful assistant.\n\nHelp me with the following task.\n\n%s" % self._prompt.format(cluster_content=cluster_content),
                     [
                         {
                             "role": "user",
-                            "content": self._prompt.format(cluster_content=cluster_content),
+                            "content": "Beside the summarization, give a title at the first line of your summarization. Must be in the same language as the paragraphs.",
                         }
                     ],
                     {"max_tokens": max(self._max_token, 512)},  # fix issue:  #10235
@@ -336,7 +396,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 self._check_task_canceled(task_id, "before embedding")
 
                 embds = await self._embedding_encode(cnt)
-                return cnt, embds
+                return cnt.split("\n")[0], cnt, embds
         except TaskCanceledException:
             raise
         except Exception as exc:
@@ -406,10 +466,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
             split_groups = [group[labels == center_id].tolist() for center_id in range(fanout)]
             split_groups = [bucket for bucket in split_groups if bucket]
             if len(split_groups) <= 1:
-                split_groups = [
-                    group[start:start + self._psi_bucket_size].tolist()
-                    for start in range(0, len(group), self._psi_bucket_size)
-                ]
+                split_groups = [group[start : start + self._psi_bucket_size].tolist() for start in range(0, len(group), self._psi_bucket_size)]
             groups.extend(split_groups)
 
         buckets = [bucket for bucket in buckets if bucket]
@@ -455,7 +512,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 original_children = len(node.children)
                 grouped_children = []
                 for start in range(0, len(node.children), max_children):
-                    batch = node.children[start:start + max_children]
+                    batch = node.children[start : start + max_children]
                     if len(batch) == 1:
                         grouped_children.append(batch[0])
                         batch[0].parent = node
@@ -561,10 +618,21 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
         return self._build_bucketed_psi_structure(nodes, next_index, task_id)
 
     def _build_psi_structure(self, chunks, task_id: str = "") -> tuple[_PsiTreeNode, list[_PsiTreeNode]]:
-        """Build the Psi merge tree from original chunk embeddings."""
+        """Build the Psi merge tree from original chunk embeddings.
+
+        ``chunks`` is expected in the normalized 3-tuple shape
+        ``(text, vec, source_chunk_ids)`` — leaves are seeded with
+        their own source ids, internal nodes get their ids set during
+        layer materialization in ``_build_psi_layers``.
+        """
         leaves = [
-            _PsiTreeNode(index=i, text=text, embedding=np.asarray(embd))
-            for i, (text, embd) in enumerate(chunks)
+            _PsiTreeNode(
+                index=i,
+                text=item[0],
+                embedding=np.asarray(item[1]),
+                source_chunk_ids=list(item[2] if len(item) > 2 else []),
+            )
+            for i, item in enumerate(chunks)
         ]
         if len(leaves) == 1:
             return leaves[0], leaves
@@ -604,7 +672,16 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
             layer_start = len(chunks)
 
             async def summarize_node(node: _PsiTreeNode):
-                """Summarize one Psi internal node if its children have text."""
+                """Summarize one Psi internal node if its children have text.
+
+                Also propagates leaf provenance: the node's
+                ``source_chunk_ids`` becomes the order-preserving deduped
+                union of every child's ``source_chunk_ids``. Because
+                children at this layer have already been processed (leaves
+                first, then bottom-up), each child carries the full set
+                of leaf ids underneath it — so the union here is the
+                complete leaf set this summary covers.
+                """
                 texts = [child.text for child in node.children if child.text]
                 if not texts:
                     logging.warning("RAPTOR Psi node %s skipped because it has no child text to summarize", node.index)
@@ -613,7 +690,15 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 if result is None:
                     logging.warning("RAPTOR Psi node %s skipped because summarization failed", node.index)
                     return None
-                node.text, node.embedding = result
+                _, node.text, node.embedding = result
+                merged_ids: list[str] = []
+                seen: set[str] = set()
+                for child in node.children:
+                    for src in child.source_chunk_ids:
+                        if src and src not in seen:
+                            seen.add(src)
+                            merged_ids.append(src)
+                node.source_chunk_ids = merged_ids
                 return node
 
             tasks = [asyncio.create_task(summarize_node(node)) for node in nodes]
@@ -628,7 +713,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
 
             summarized_nodes = [node for node in summarized_nodes if node is not None]
             for node in summarized_nodes:
-                chunks.append((node.text, node.embedding))
+                chunks.append((node.text, node.embedding, list(node.source_chunk_ids)))
 
             if len(chunks) > layer_start:
                 layers.append((layer_start, len(chunks)))
@@ -646,34 +731,119 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
 
         return chunks, layers
 
-    async def __call__(self, chunks, random_state, callback=None, task_id: str = ""):
-        """Build summary chunks and layer boundaries for RAPTOR retrieval."""
+    async def __call__(
+        self,
+        chunks,
+        random_state,
+        callback=None,
+        task_id: str = "",
+        is_tree: bool = False,
+    ):
+        """Build summary chunks and layer boundaries for RAPTOR retrieval.
+
+        ``chunks`` accepts either the legacy 2-tuple shape
+        ``(text, vec)`` or the provenance-carrying 3-tuple shape
+        ``(text, vec, source_chunk_ids)`` where ``source_chunk_ids`` is
+        the list of original chunk ids that produced this entry. Output
+        always uses the 3-tuple shape so every appended summary carries
+        its leaves' ids. ``[]`` is left in the slot for a leaf whose id
+        was missing — see the caller for the normalization rules.
+
+        Return shapes:
+          * ``is_tree=False`` (default) — original behavior: returns
+            ``(chunks, layers)`` where ``chunks`` is the flat list
+            (originals + summaries) and ``layers`` is the per-level
+            index range ``[(start, end), ...]``.
+          * ``is_tree=True`` — returns a hierarchical tree dict via
+            ``_materialize_tree``. Supported for the classic builder
+            only; raises ``NotImplementedError`` for PSI_TREE_BUILDER
+            (PSI's hyperedge-driven summarization doesn't form a strict
+            parent-of relation). Returns ``None`` when there's nothing
+            to materialize.
+        """
         if len(chunks) <= 1:
-            return [], []
-        chunks = [(s, a) for s, a in chunks if s and a is not None and len(a) > 0]
-        if len(chunks) <= 1:
-            return chunks, [(0, len(chunks))]
+            return None if is_tree else ([], [])
+
+        # Normalize input to the 3-tuple shape. Reject empties / bad
+        # vectors at the same time the legacy path used to.
+        def _normalize(item):
+            if len(item) >= 3:
+                text, vec, src = item[0], item[1], item[2]
+            else:
+                text, vec = item[0], item[1]
+                src = []
+            if not text or vec is None or len(vec) <= 0:
+                return None
+            # Defensive: a leaf should carry a list of strings. Drop
+            # falsy entries so we don't propagate empty ids upward.
+            if isinstance(src, (list, tuple)):
+                src = [s for s in src if s]
+            else:
+                src = [src] if src else []
+            return (text, vec, list(src), "")
+
+        normalized = [t for t in (_normalize(c) for c in chunks) if t is not None]
+        if len(normalized) <= 1:
+            return None if is_tree else (normalized, [(0, len(normalized))])
+        chunks = normalized
+
         if self._tree_builder == PSI_TREE_BUILDER:
+            if is_tree:
+                raise NotImplementedError(
+                    "is_tree=True is not supported for PSI_TREE_BUILDER",
+                )
             logging.info("RAPTOR: using %s tree builder for %d chunks", self._tree_builder, len(chunks))
             return await self._build_psi_layers(chunks, callback, task_id)
 
+        # ``parent_child_map`` records each summary's immediate
+        # children so ``_materialize_tree`` can walk back into a tree
+        # when ``is_tree`` is set. Always populated (cheap) so the
+        # tree path is just a return-shape choice at the end.
+        parent_child_map: dict[int, list[int]] = {}
+        n_originals = len(chunks)
+
         layers = [(0, len(chunks))]
         start, end = 0, len(chunks)
 
         @timeout(60 * 20)
         async def summarize(ck_idx: list[int]):
-            """Summarize one classic RAPTOR cluster into the chunk list."""
+            """Summarize one classic RAPTOR cluster into the chunk list.
+
+            On success appends ``(summary_text, summary_vec, src_ids)``
+            where ``src_ids`` is the order-preserving deduped union of
+            the ``source_chunk_ids`` of every chunk indexed in
+            ``ck_idx`` — i.e. the full leaf set that contributed to
+            the cluster, even through nested summaries.
+            """
             nonlocal chunks
 
             texts = [chunks[i][0] for i in ck_idx]
             result = await self._summarize_texts(texts, callback, task_id)
             if result is not None:
-                chunks.append(result)
+                # ``dict.fromkeys`` is the cheapest way to de-dup a
+                # list of strings while preserving first-seen order.
+                merged_ids: list[str] = []
+                seen: set[str] = set()
+                for i in ck_idx:
+                    for src in chunks[i][2]:
+                        if src and src not in seen:
+                            seen.add(src)
+                            merged_ids.append(src)
+                summary_ti, summary_text, summary_vec = result
+                chunks.append((summary_text, summary_vec, merged_ids, summary_ti))
+                # Index of the just-appended summary; map it to its
+                # immediate children for the tree materializer below.
+                parent_child_map[len(chunks) - 1] = list(ck_idx)
 
         while end - start > 1:
             self._check_task_canceled(task_id, "layer processing")
 
-            embeddings = [embd for _, embd in chunks[start:end]]
+            # ``chunks`` is a mix of 3-tuples (layer-0 originals from
+            # _normalize) and 4-tuples (summaries appended by
+            # summarize). Vector is always at index 1 in both shapes,
+            # so use positional access — the older ``_, embd, _, _``
+            # form crashed on layer-0 entries.
+            embeddings = [entry[1] for entry in chunks[start:end]]
             if len(embeddings) == 2:
                 await summarize([start, start + 1])
                 produced = len(chunks) - end
@@ -687,43 +857,34 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
                 end = len(chunks)
                 continue
 
-            n_neighbors = int((len(embeddings) - 1) ** 0.8)
-            reduced_embeddings = umap.UMAP(
-                n_neighbors=max(2, n_neighbors),
-                n_components=min(12, len(embeddings) - 2),
-                metric="cosine",
-            ).fit_transform(embeddings)
-            if self._clustering_method == AHC_CLUSTERING_METHOD:
-                logging.info("RAPTOR: using clustering_method=%s before _get_clusters_ahc", self._clustering_method)
-                raw_labels = self._get_clusters_ahc(reduced_embeddings, task_id=task_id)
-                raw_cluster_count = np.unique(raw_labels).size
-                logging.info("RAPTOR AHC: _get_clusters_ahc produced n_clusters=%d", raw_cluster_count)
-                if raw_cluster_count > 1:
-                    adjusted = self._adjust_tree_nodes(reduced_embeddings, raw_labels)
-                    adjusted_cluster_count = np.unique(adjusted).size
-                    logging.info("RAPTOR AHC: _adjust_tree_nodes adjusted n_clusters=%d", adjusted_cluster_count)
-                else:
-                    adjusted = raw_labels
-                    logging.warning("RAPTOR AHC: _adjust_tree_nodes skipped because _get_clusters_ahc returned one cluster")
-                unique_labels = np.unique(adjusted)
-                label_map = {old: idx for idx, old in enumerate(unique_labels)}
-                lbls = [label_map[int(lbl)] for lbl in adjusted]
-                n_clusters = len(unique_labels)
-            else:
-                n_clusters = self._get_optimal_clusters(reduced_embeddings, random_state, task_id=task_id)
-                if n_clusters == 1:
-                    lbls = [0 for _ in range(len(reduced_embeddings))]
-                else:
-                    gm = GaussianMixture(n_components=n_clusters, random_state=random_state)
-                    gm.fit(reduced_embeddings)
-                    probs = gm.predict_proba(reduced_embeddings)
-                    lbls = [np.where(prob > self._threshold)[0] for prob in probs]
-                    lbls = [lbl[0] if isinstance(lbl, np.ndarray) else lbl for lbl in lbls]
-
-            if n_clusters == 1:
-                lbls = [0 for _ in range(len(reduced_embeddings))]
-            else:
-                lbls = [int(lbl[0]) if isinstance(lbl, np.ndarray) else int(lbl) for lbl in lbls]
+            n_clusters, lbls = self.clustering(
+                embeddings,
+                random_state=random_state,
+                task_id=task_id,
+            )
+            
+            # Loop-termination guarantee. The outer ``while end - start > 1``
+            # relies on each layer strictly shrinking the input count. If
+            # the clusterer degenerates and returns one cluster per input,
+            # every "cluster" is a single chunk, ``summarize()`` produces
+            # one summary per input, and ``produced == end - start`` —
+            # the same count carries into the next iteration and the loop
+            # spins forever, logging "Cluster one layer: N -> N".
+            #
+            # Collapse everything at this level into a single cluster so
+            # the layer produces exactly one summary. The tree gets a
+            # taller-than-usual "single trunk" segment at this depth
+            # instead of an infinite loop; downstream consumers only care
+            # that ``layers`` is monotonically shrinking.
+            if n_clusters >= len(embeddings):
+                logging.warning(
+                    "RAPTOR clustering did not reduce input count "
+                    "(%d inputs → %d clusters); collapsing this layer "
+                    "into a single summary to prevent a non-terminating loop",
+                    len(embeddings), n_clusters,
+                )
+                n_clusters = 1
+                lbls = [0] * len(embeddings)
 
             tasks = []
             for c in range(n_clusters):
@@ -758,4 +919,52 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
             start = end
             end = len(chunks)
 
+        if is_tree:
+            return self._materialize_tree(chunks, layers, parent_child_map, n_originals), []
         return chunks, layers
+
+    @staticmethod
+    def _materialize_tree(chunks, layers, parent_child_map, n_originals):
+        """Walk ``parent_child_map`` from the top layer down to layer-1
+        and emit the user-facing tree dict. See ``__call__``'s
+        ``is_tree=True`` contract for the shape.
+        chunks: [(summary_text, summary_vec, merged_ids, summary_ti)]"""
+        if not layers or len(chunks) == 0:
+            return None
+        top_start, top_end = layers[-1]
+        if top_end <= top_start:
+            return None
+
+        def _title_at(idx: int) -> str:
+            # Summary tuples are (text, vec, merged_ids, summary_ti)
+            # — title is the 4th slot. Layer-0 originals are 3-tuples
+            # and don't appear as tree nodes themselves (they collapse
+            # into source_chunk_ids on their layer-1 parent).
+            return chunks[idx][3] if len(chunks[idx]) >= 4 else ""
+
+        def _desc_at(idx: int) -> str:
+            return chunks[idx][0] if chunks[idx] else ""
+
+        def _build_node(idx: int) -> dict:
+            children_idx = parent_child_map.get(idx, [])
+            # If every immediate child is a layer-0 original, this
+            # node is a "leaf" in the tree contract — collapse to
+            # source_chunk_ids.
+            if children_idx and all(c < n_originals for c in children_idx):
+                ids: list[str] = []
+                seen: set[str] = set()
+                for c in children_idx:
+                    for s in chunks[c][2]:
+                        if s and s not in seen:
+                            seen.add(s)
+                            ids.append(s)
+                return {"title": _title_at(idx), "source_chunk_ids": ids, "description": _desc_at(idx)}
+            return {"children": [_build_node(c) for c in children_idx], "title": _title_at(idx), "description": _desc_at(idx)}
+
+        top_nodes = [_build_node(i) for i in range(top_start, top_end)]
+        if len(top_nodes) == 1:
+            return top_nodes[0]
+        # Multiple top-layer summaries — clustering didn't collapse to
+        # a single root. Wrap in a synthetic root so the caller always
+        # sees one dict.
+        return {"title": "(root)", "children": top_nodes}
diff --git a/rag/advanced_rag/knowlege_compile/structure.py b/rag/advanced_rag/knowlege_compile/structure.py
new file mode 100644
index 0000000000..e60f82e178
--- /dev/null
+++ b/rag/advanced_rag/knowlege_compile/structure.py
@@ -0,0 +1,1637 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import datetime
+import json
+import logging
+from typing import Callable, Tuple
+
+import xxhash
+
+from common.exceptions import TaskCanceledException
+from common.misc_utils import thread_pool_exec
+from common.token_utils import num_tokens_from_string
+from rag.prompts.generator import gen_json
+
+from ._common import (
+    build_chunk_batches as _build_chunk_batches,
+    encode as _encode,
+    find_vec_field as _find_vec_field,
+    stable_row_id as _stable_row_id,
+    tokenize_for_search as _tokenize_for_search,
+    union_ordered as _union_ordered,
+    run_chunked_pipeline as _run_chunked_pipeline,
+)
+
+
+_STRUCT_TYPES = ("list", "set", "hypergraph")
+
+
+def _struct_normalize_kind(kind) -> str:
+    if not isinstance(kind, str):
+        return ""
+    normalized = kind.strip().lower().replace("-", "_")
+    if normalized in {"pageindex", "page_index", "knowledge_graph"}:
+        return "timeline"
+    return normalized
+
+
+def _struct_localize(value, language: str = "en") -> str:
+    """Render multilingual values to a single string (mirrors loader._localize_data)."""
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, list):
+        return "\n".join(f"{i + 1}. {item}" for i, item in enumerate(value))
+    if isinstance(value, dict):
+        v = value.get(language)
+        if v is None and language != "en":
+            v = value.get("en")
+        if isinstance(v, str):
+            return v
+        if isinstance(v, list):
+            return "\n".join(f"{i + 1}. {item}" for i, item in enumerate(v))
+    return ""
+
+
+def _struct_get(cfg: dict, *keys, default=None):
+    """Case-insensitive lookup against the first matching key."""
+    if not isinstance(cfg, dict):
+        return default
+    for k in keys:
+        if k in cfg:
+            return cfg[k]
+        kl = k.lower()
+        for ck in cfg.keys():
+            if isinstance(ck, str) and ck.lower() == kl:
+                return cfg[ck]
+    return default
+
+
+def _struct_infer_type(parser_config: dict) -> str:
+    explicit = _struct_get(parser_config, "compile_type")
+    normalized_explicit = _struct_normalize_kind(explicit)
+    if normalized_explicit in _STRUCT_TYPES:
+        return normalized_explicit
+    kind = _struct_get(parser_config, "kind")
+    normalized_kind = _struct_normalize_kind(kind)
+    if normalized_kind:
+        return normalized_kind
+    output = _struct_get(parser_config, "output", default={}) or {}
+    if _struct_get(output, "entities") and _struct_get(output, "relations"):
+        return "hypergraph"
+    return "list"
+
+
+def _struct_supported_type(parser_config: dict, autotype: str) -> bool:
+    if autotype in _STRUCT_TYPES:
+        return True
+    kind = _struct_get(parser_config, "kind")
+    return _struct_normalize_kind(kind) == autotype
+
+
+def _struct_render_fields(fields: list, language: str) -> Tuple[str, str]:
+    """Return (bulleted field descriptions, JSON skeleton for one item)."""
+    lines = []
+    skeleton_parts = []
+    for f in fields or []:
+        name = f.get("name", "")
+        ftype = f.get("type", "str")
+        desc = _struct_localize(f.get("description", ""), language)
+        required = f.get("required")
+        req_label = "optional" if required is False else "required"
+        lines.append(f"- {name} ({ftype}, {req_label}): {desc}")
+        if ftype == "list":
+            placeholder = "[<string>, ...]"
+        elif ftype == "int":
+            placeholder = "<int>"
+        elif ftype == "float":
+            placeholder = "<float>"
+        elif ftype == "bool":
+            placeholder = "<true|false>"
+        else:
+            placeholder = "<string>"
+        skeleton_parts.append(f'"{name}": {placeholder}')
+    return "\n".join(lines), "{ " + ", ".join(skeleton_parts) + " }"
+
+
+def _struct_render_type_fields(fields: list, language: str, *, kind: str) -> Tuple[str, str]:
+    """Render the new compilation-template field shape.
+
+    New templates define allowed item ``type`` values with descriptions/rules,
+    rather than arbitrary output field names. The extraction output keeps a
+    stable shape so downstream merge logic can compare concrete items instead
+    of collapsing every item into the template type.
+    """
+    lines: list[str] = []
+    type_values: list[str] = []
+    for f in fields or []:
+        if not isinstance(f, dict):
+            continue
+        typ = f.get("type")
+        typ = typ.strip() if isinstance(typ, str) else ""
+        if not typ:
+            continue
+        type_values.append(typ)
+        lines.append(f"- type: {typ}")
+        desc = _struct_localize(f.get("description"), language)
+        rule = _struct_localize(f.get("rule"), language)
+        if desc:
+            lines.append(f"  description: {desc}")
+        if rule:
+            lines.append(f"  rule: {rule}")
+
+    if not type_values:
+        type_values.append("other")
+        lines.append("- type: other")
+
+    if kind == "relation":
+        skeleton = '{ "type": "<one of: ' + "|".join(type_values) + '>", "source": "<known entity name>", "target": "<known entity name>", "description": "<evidence or relation description>" }'
+    else:
+        skeleton = '{ "type": "<one of: ' + "|".join(type_values) + '>", "name": "<exact extracted item text>", "description": "<evidence, definition, or detail from the source>" }'
+    return "\n".join(lines), skeleton
+
+
+def _struct_hypergraph_prompts(parser_config: dict, language: str = "en") -> Tuple[str, str]:
+    autotype = _struct_infer_type(parser_config)
+    guideline = _struct_get(parser_config, "guideline", default={}) or {}
+    output = _struct_get(parser_config, "output", default={}) or {}
+    options = _struct_get(parser_config, "options", default={}) or {}
+    uses_template_shape = bool(_struct_get(parser_config, "entity") or _struct_get(parser_config, "relation"))
+
+    target = _struct_localize(_struct_get(guideline, "target"), language)
+    rules_e = _struct_localize(_struct_get(guideline, "rules_for_entities"), language)
+    rules_r = _struct_localize(_struct_get(guideline, "rules_for_relations"), language)
+    rules_t = _struct_localize(_struct_get(guideline, "rules_for_time"), language)
+    global_rules = _struct_localize(_struct_get(parser_config, "global_rules"), language)
+
+    observation_time = _struct_get(options, "observation_time") or datetime.date.today().isoformat()
+    if rules_t and "{observation_time}" in rules_t:
+        rules_t = rules_t.replace("{observation_time}", observation_time)
+
+    entities_cfg = _struct_get(parser_config, "entity", default={}) or {} if uses_template_shape else _struct_get(output, "entities", default={}) or {}
+    relations_cfg = _struct_get(parser_config, "relation", default={}) or {} if uses_template_shape else _struct_get(output, "relations", default={}) or {}
+    ent_desc = _struct_localize(_struct_get(entities_cfg, "description"), language)
+    rel_desc = _struct_localize(_struct_get(relations_cfg, "description"), language)
+    ent_fields = _struct_get(entities_cfg, "fields", default=[]) or []
+    rel_fields = _struct_get(relations_cfg, "fields", default=[]) or []
+    if uses_template_shape:
+        ent_fields_text, ent_skel = _struct_render_type_fields(ent_fields, language, kind="entity")
+        rel_fields_text, rel_skel = _struct_render_type_fields(rel_fields, language, kind="relation")
+    else:
+        ent_fields_text, ent_skel = _struct_render_fields(ent_fields, language)
+        rel_fields_text, rel_skel = _struct_render_fields(rel_fields, language)
+
+    node_parts = [f"# Role and Task:\n{target}"] if target else []
+    if global_rules:
+        node_parts.append(f"## Global Rules:\n{global_rules}")
+    if rules_e:
+        node_parts.append(f"## Entity Extraction Rules:\n{rules_e}")
+    if ent_desc:
+        node_parts.append(f"## Entity Description:\n{ent_desc}")
+    node_parts.append(f"## Entity Fields:\n{ent_fields_text}")
+    node_parts.append(
+        "## Response Format:\n"
+        "Reply with a single JSON object of the form: "
+        f'{{"items": [{ent_skel}, ...]}}.\n'
+        f'Auto-type: "{_struct_infer_type(parser_config)}". ' + ("Items must be unique. " if autotype == "set" else "") + "Return JSON only, no commentary."
+    )
+    node_prompt = "\n\n".join(node_parts)
+
+    if not relations_cfg:
+        return node_prompt, ""
+
+    edge_parts = [f"# Role and Task:\n{target}"] if target else []
+    if global_rules:
+        edge_parts.append(f"## Global Rules:\n{global_rules}")
+    if rules_r:
+        edge_parts.append(f"## Relation Extraction Rules:\n{rules_r}")
+    if rules_t:
+        edge_parts.append(f"## Time Rules:\n{rules_t}")
+    if rel_desc:
+        edge_parts.append(f"## Relation Description:\n{rel_desc}")
+    edge_parts.append(f"## Relation Fields:\n{rel_fields_text}")
+    edge_parts.append("## Known Entities:\n{known_nodes}")
+    edge_parts.append(
+        "## Response Format:\n"
+        "Reply with a single JSON object of the form: "
+        f'{{"items": [{rel_skel}, ...]}}.\n'
+        "Only create relations between entities listed in 'Known Entities'. "
+        "Return JSON only, no commentary."
+    )
+    edge_prompt = "\n\n".join(edge_parts)
+
+    return node_prompt, edge_prompt
+
+
+def _struct_entity_id_field(parser_config: dict) -> str:
+    if _struct_get(parser_config, "entity"):
+        return "name"
+    identifiers = _struct_get(parser_config, "identifiers", default={}) or {}
+    entity_id = _struct_get(identifiers, "entity_id")
+    if isinstance(entity_id, str) and "{" not in entity_id and entity_id.strip():
+        return entity_id.strip()
+    entities_cfg = _struct_get(_struct_get(parser_config, "output", default={}) or {}, "entities", default={}) or {}
+    for f in _struct_get(entities_cfg, "fields", default=[]) or []:
+        if f.get("required") is not False:
+            return f.get("name", "name")
+    return "name"
+
+
+def _struct_unwrap_items(res) -> list:
+    if res is None:
+        return []
+    if isinstance(res, dict):
+        items = res.get("items")
+        if isinstance(items, list):
+            return [it for it in items if isinstance(it, dict)]
+        return []
+    if isinstance(res, list):
+        return [it for it in res if isinstance(it, dict)]
+    return []
+
+
+async def _struct_extract_hypergraph(text: str, parser_config: dict, chat_mdl, language: str) -> Tuple[list[dict], list[dict]]:
+    node_prompt, edge_prompt_template = _struct_hypergraph_prompts(parser_config, language)
+
+    user_prompt = f"## Source Text:\n{text}\n\n## Output (JSON only):"
+    node_res = await gen_json(node_prompt, user_prompt, chat_mdl, gen_conf={"temperature": 0.1})
+    nodes = _struct_unwrap_items(node_res)
+
+    id_field = _struct_entity_id_field(parser_config)
+    known_keys = []
+    for n in nodes:
+        v = n.get(id_field)
+        if v is None:
+            continue
+        v_str = str(v).strip()
+        if v_str and v_str not in known_keys:
+            known_keys.append(v_str)
+    known_str = "- " + "\n- ".join(known_keys) if known_keys else "(none)"
+
+    if not edge_prompt_template:
+        return nodes, []
+
+    edge_prompt = edge_prompt_template.replace("{known_nodes}", known_str)
+    edge_res = await gen_json(edge_prompt, user_prompt, chat_mdl, gen_conf={"temperature": 0.1})
+    edges = _struct_unwrap_items(edge_res)
+
+    return nodes, edges
+
+
+# Backwards-compat alias for the shared helper. New code should use
+# ``_common.encode`` directly; kept here so existing references inside this
+# module keep working without a wider rename.
+_struct_embed = _encode
+
+
+def _struct_payload_description(payload: dict) -> str:
+    """Concat string values of every non-description field (lists flattened)."""
+    parts: list[str] = []
+    for k, v in payload.items():
+        if isinstance(v, (list, tuple)):
+            for item in v:
+                if item is None:
+                    continue
+                s = str(item).strip()
+                if s:
+                    parts.append(s)
+        else:
+            s = str(v).strip()
+            if s:
+                parts.append(s)
+    return " ".join(parts)
+
+
+def _struct_load_payload(doc: dict) -> dict:
+    try:
+        payload = json.loads(doc.get("content_with_weight") or "{}")
+    except Exception:
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
+def _struct_graph_entity(payload: dict, source_chunk_ids: list | None = None) -> dict | None:
+    name = payload.get("name") or payload.get("text") or payload.get("term") or payload.get("title")
+    name = str(name).strip() if name is not None else ""
+    if not name:
+        return None
+    typ = payload.get("type") or "other"
+    typ = str(typ).strip() if typ is not None else "other"
+    aliases = payload.get("aliases")
+    if isinstance(aliases, str):
+        aliases = [aliases]
+    if not isinstance(aliases, list):
+        aliases = []
+    aliases = [str(a).strip() for a in aliases if str(a).strip()]
+    description = payload.get("description") or payload.get("discription") or payload.get("definition_excerpt") or ""
+    if isinstance(source_chunk_ids, str):
+        source_chunk_ids = [source_chunk_ids]
+    source_chunk_ids = _struct_union_chunk_ids(source_chunk_ids)
+    return {
+        "aliases": aliases,
+        "mention_count": 1,
+        "name": name,
+        "source_chunk_ids": source_chunk_ids,
+        "type": typ or "other",
+        "discription": str(description).strip() if description is not None else "",
+    }
+
+
+def _struct_graph_relation(payload: dict) -> dict | None:
+    src = payload.get("source") or payload.get("src") or payload.get("from")
+    tgt = payload.get("target") or payload.get("tgt") or payload.get("to")
+    src = str(src).strip() if src is not None else ""
+    tgt = str(tgt).strip() if tgt is not None else ""
+    if not src or not tgt:
+        return None
+    typ = payload.get("type") or "related"
+    return {
+        "from": src,
+        "to": tgt,
+        "type": str(typ).strip() if typ is not None else "related",
+    }
+
+
+def _struct_merge_graph_entities(entities: list[dict]) -> list[dict]:
+    merged: dict[tuple[str, str], dict] = {}
+    order: list[tuple[str, str]] = []
+    for entity in entities:
+        key = (entity["name"], entity.get("type") or "other")
+        if key not in merged:
+            merged[key] = entity
+            order.append(key)
+            continue
+        target = merged[key]
+        target["mention_count"] = int(target.get("mention_count") or 0) + int(entity.get("mention_count") or 1)
+        aliases = target.setdefault("aliases", [])
+        for alias in entity.get("aliases") or []:
+            if alias not in aliases:
+                aliases.append(alias)
+        if not target.get("discription") and entity.get("discription"):
+            target["discription"] = entity["discription"]
+        target["source_chunk_ids"] = _struct_union_chunk_ids(
+            target.get("source_chunk_ids"),
+            entity.get("source_chunk_ids"),
+        )
+    return [merged[key] for key in order]
+
+
+def _struct_relation_member_fields(parser_config: dict) -> Tuple:
+    """Return (source_field, target_field) for relation docs, or (None, None).
+
+    Looks at ``identifiers.relation_members`` first (dict form for graph-style
+    configs, e.g. ``{source: source, target: target}``); falls back to the
+    conventional ``source`` / ``target`` field names if both appear in the
+    relation schema.
+    """
+    identifiers = _struct_get(parser_config, "identifiers", default={}) or {}
+    members = _struct_get(identifiers, "relation_members")
+    if isinstance(members, dict):
+        src = members.get("source") or members.get("src")
+        tgt = members.get("target") or members.get("tgt")
+        if src or tgt:
+            return src, tgt
+
+    if _struct_get(parser_config, "relation"):
+        return "source", "target"
+
+    relations_cfg = (
+        _struct_get(
+            _struct_get(parser_config, "output", default={}) or {},
+            "relations",
+            default={},
+        )
+        or {}
+    )
+    field_names = {f.get("name") for f in (_struct_get(relations_cfg, "fields", default=[]) or []) if isinstance(f, dict)}
+    if "source" in field_names and "target" in field_names:
+        return "source", "target"
+    return None, None
+
+
+def _struct_to_es_doc(
+    payload: dict,
+    compile_kwd: str,
+    doc_id: str,
+    chunk_ids: list[str],
+    vec,
+    kind: str,
+    src_field: str | None = None,
+    target_field: str | None = None,
+    compilation_template_id: str | None = None,
+    compilation_template_kind: str | None = None,
+) -> dict:
+    """Build one ES doc for an extracted entity or relation.
+
+    Args:
+        kind: ``"entity"`` or ``"relation"`` — written to ``knowledge_graph_kwd``.
+        src_field / target_field: when ``kind == "relation"`` and these field
+            names exist on the payload, the resolved values are written to
+            ``from_entity_kwd`` / ``to_entity_kwd``.
+        compilation_template_id / compilation_template_kind: stamped onto
+            every row so the document-structure endpoint can group by
+            template id and the UI can render one tab per template. The
+            id is stored as a single-element list under
+            ``compilation_template_ids`` because the same logical entity
+            *could* later be claimed by multiple templates during a
+            cross-template merge (rare, but the schema is forward-compat).
+    """
+    content_with_weight = json.dumps(payload, ensure_ascii=False)
+    if hasattr(vec, "tolist"):
+        vec_list = vec.tolist()
+    else:
+        vec_list = list(vec)
+    doc_id_str = str(doc_id)
+    template_id_str = str(compilation_template_id).strip() if compilation_template_id else ""
+
+    description = _struct_payload_description(payload)
+    content_ltks, content_sm_ltks = _tokenize_for_search(description)
+
+    # Mix the template id into the stable row id so two templates with the
+    # same compile_kwd don't collide on identical payloads (e.g. two
+    # different list-kind templates that each extract "headline X").
+    row_seed_extras = [template_id_str] if template_id_str else []
+    row_id = _stable_row_id(content_with_weight, doc_id_str, *row_seed_extras)
+
+    doc = {
+        "content_with_weight": content_with_weight,
+        "compile_kwd": compile_kwd,
+        "knowledge_graph_kwd": kind,
+        "doc_id": doc_id_str,
+        "source_chunk_ids": list(chunk_ids or []),
+        "content_ltks": content_ltks,
+        "content_sm_ltks": content_sm_ltks,
+        f"q_{len(vec_list)}_vec": vec_list,
+        "id": row_id,
+    }
+    if template_id_str:
+        doc["compilation_template_ids"] = [template_id_str]
+    if compilation_template_kind:
+        doc["compilation_template_kind_kwd"] = str(compilation_template_kind)
+
+    if kind == "relation":
+        if src_field:
+            src_val = payload.get(src_field)
+            if src_val is not None and str(src_val).strip():
+                doc["from_entity_kwd"] = str(src_val).strip()
+        if target_field:
+            tgt_val = payload.get(target_field)
+            if tgt_val is not None and str(tgt_val).strip():
+                doc["to_entity_kwd"] = str(tgt_val).strip()
+
+    return doc
+
+
+async def _struct_process_batch(
+    packed: list[dict],
+    batch_idx: int,
+    total: int,
+    autotype: str,
+    parser_config: dict,
+    chat_mdl,
+    embd_mdl,
+    doc_id: str,
+    language: str,
+    callback,
+    semaphore,
+    compilation_template_id: str | None = None,
+    compilation_template_kind: str | None = None,
+) -> list[dict]:
+    """Process one packed batch end-to-end (extract → embed → ES docs).
+
+    ``packed`` is the per-batch shape produced by
+    ``_common.build_chunk_batches``: ``[{label, chunk_id, text}, ...]``.
+    The ``label`` field is unused here — structure uses ``---`` separators
+    instead of per-chunk labels — but ``chunk_id`` is collected so every
+    item produced by this batch carries the batch's source chunk ids.
+
+    The semaphore (if any) is taken around the entire batch's LLM +
+    embedding work to bound peak concurrency.
+    """
+    if not packed:
+        return []
+
+    batch_ids: list = [e["chunk_id"] for e in packed if e.get("chunk_id")]
+    batch_segments: list[str] = [e["text"] for e in packed if isinstance(e.get("text"), str)]
+    combined_text = "\n\n---\n\n".join(batch_segments)
+
+    src_field, target_field = _struct_relation_member_fields(parser_config)
+
+    async def _run() -> list[dict]:
+        # For hypergraph, entity extraction MUST complete before edge extraction
+        # within the same batch, because the edge prompt's {known_nodes}
+        # placeholder is filled from this batch's extracted nodes — see
+        # _struct_extract_hypergraph. Parallelism across batches is fine; the
+        # two stages within one batch are strictly sequential.
+        try:
+            items, relations = await _struct_extract_hypergraph(combined_text, parser_config, chat_mdl, language)
+        except Exception as e:
+            logging.exception(f"compile_structure_from_text: extraction failed for batch {batch_idx}: {e}")
+            return []
+
+        payloads = items + relations
+        kinds = ["entity"] * len(items) + ["relation"] * len(relations)
+        if not payloads:
+            if callback:
+                callback((batch_idx + 1) / total, f"{batch_idx + 1}/{total} batches: 0 items")
+            return []
+
+        embed_inputs = [_struct_payload_description(p) for p in payloads]
+        try:
+            embeddings = await _struct_embed(embd_mdl, embed_inputs)
+        except Exception as e:
+            logging.exception(f"compile_structure_from_text: embedding failed for batch {batch_idx}: {e}")
+            return []
+
+        if len(embeddings) != len(payloads):
+            logging.error(f"compile_structure_from_text: embedding count mismatch ({len(embeddings)} vs {len(payloads)}) for batch {batch_idx}")
+            return []
+
+        docs = [
+            _struct_to_es_doc(
+                payload,
+                autotype,
+                doc_id,
+                batch_ids,
+                vec,
+                kind,
+                src_field=src_field,
+                target_field=target_field,
+                compilation_template_id=compilation_template_id,
+                compilation_template_kind=compilation_template_kind,
+            )
+            for payload, vec, kind in zip(payloads, embeddings, kinds)
+        ]
+
+        if callback:
+            callback((batch_idx + 1) / total, f"{batch_idx + 1}/{total} batches: {len(payloads)} items")
+
+        return docs
+
+    if semaphore is not None:
+        async with semaphore:
+            return await _run()
+    return await _run()
+
+
+async def compile_structure_from_text(
+    chunks: list[dict],
+    parser_config,
+    chat_mdl,
+    embd_mdl,
+    doc_id: str,
+    language: str = "en",
+    callback=None,
+    max_workers: int = 10,
+    compilation_template_id: str | None = None,
+) -> list[dict]:
+    """Extract list/set/hypergraph structures from text chunks and prepare ES docs.
+
+    Each chunk is processed independently — cross-chunk merging of entities and
+    relations is deferred to a separate pipeline stage and is intentionally not
+    performed here.
+
+    Args:
+        chunks: list of dicts; each must expose ``id`` and ``text`` (a
+            ``content_with_weight`` fallback is also accepted).
+        parser_config: dict already parsed from ``document.parser_config["knowledge_compilation"]`` or
+            the raw JSON string from the database.
+        chat_mdl: LLMBundle for chat (used via ``gen_json``).
+        embd_mdl: LLMBundle for embeddings (used via ``encode``).
+        doc_id: source document id, embedded into every ES doc.
+        language: language code for resolving multilingual config strings.
+        callback: optional progress callback ``(prog: float, msg: str)``.
+
+    Returns:
+        List of ES-ready dicts shaped as::
+
+            {
+                "content_with_weight": <json>,
+                "compile_kwd": "list" | "set" | "hypergraph",
+                "doc_id": <doc_id>,
+                "source_chunk_ids": [<chunk_id>, ...],
+                "q_<dim>_vec": [...],
+                "id": <xxhash>,
+            }
+    """
+    if isinstance(parser_config, str):
+        try:
+            parser_config = json.loads(parser_config)
+        except Exception as e:
+            logging.exception(f"compile_structure_from_text: invalid parser_config JSON: {e}")
+            return []
+    if not isinstance(parser_config, dict):
+        logging.error("compile_structure_from_text: parser_config must be a dict or JSON string")
+        return []
+
+    autotype = _struct_infer_type(parser_config)
+    if not _struct_supported_type(parser_config, autotype):
+        logging.error(f"compile_structure_from_text: unsupported type '{autotype}'")
+        return []
+
+    node_prompt, edge_prompt = _struct_hypergraph_prompts(parser_config, language)
+    prompt_overhead = max(num_tokens_from_string(node_prompt), num_tokens_from_string(edge_prompt))
+
+    # ``kind`` for the row stamp follows the template's ``kind`` field if
+    # present (e.g. "timeline", "page_index"); we fall back to the
+    # inferred autotype ("list" / "set" / "hypergraph") so legacy
+    # configs without a kind still get a sensible label on the UI tab.
+    template_kind = parser_config.get("kind") if isinstance(parser_config, dict) else None
+    if not isinstance(template_kind, str) or not template_kind.strip():
+        template_kind = autotype
+
+    packed_batches, _info = _build_chunk_batches(
+        chunks,
+        chat_mdl,
+        prompt_overhead_tokens=prompt_overhead,
+    )
+    if not packed_batches:
+        return []
+
+    async def _process_one(batch: list[dict], bi: int, total: int) -> list[dict]:
+        # The engine's semaphore already bounds concurrency.
+        return await _struct_process_batch(
+            packed=batch,
+            batch_idx=bi,
+            total=total,
+            autotype=autotype,
+            parser_config=parser_config,
+            chat_mdl=chat_mdl,
+            embd_mdl=embd_mdl,
+            doc_id=doc_id,
+            language=language,
+            callback=callback,
+            semaphore=None,
+            compilation_template_id=compilation_template_id,
+            compilation_template_kind=template_kind,
+        )
+
+    def _flatten(per_batch: list) -> list[dict]:
+        out: list[dict] = []
+        for br in per_batch or []:
+            if br:
+                out.extend(br)
+        return out
+
+    return await _run_chunked_pipeline(
+        packed_batches,
+        process_batch=_process_one,
+        aggregate=_flatten,
+        max_workers=max_workers,
+        callback=callback,
+        log_prefix="compile_structure",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Structured-knowledge merging: local dedup + ES dedup
+# ---------------------------------------------------------------------------
+#
+# Pipeline (per spec):
+#   Phase 1 — Local dedup inside `docs`:
+#     - Group by (doc_id, compile_kwd, from_entity_kwd?, to_entity_kwd?).
+#     - Within each group, compute pairwise cosine similarity (sklearn) over
+#       q_<dim>_vec, and for each pair above ``similarity_threshold`` (0.9 by
+#       default) ask the LLM via _struct_merge_pair to decide if they're the
+#       same logical item; if yes, collapse in memory (union chunk_ids,
+#       regenerate vector + tokens off the merged payload).
+#   Phase 2 — ES dedup of the surviving docs:
+#     - For each, KNN-search ES with the same filter via MatchDenseExpr; if a
+#       top-1 hit comes back above ``similarity_threshold`` and the LLM judges
+#       it a duplicate, REPLACE the existing ES doc by its old ``id``
+#       (preserving src/target on relations and unioning chunk_ids). Else
+#       insert the new doc.
+#
+# Merge is driven by the user-supplied prompts; a small decision instruction
+# is appended so we can branch on the LLM's verdict via gen_json.
+
+MERGE_SYSTEM_PROMPT = """You are an intelligent data merging assistant.
+You will merge two JSON objects representing the same entity: Item A (existing) and Item B (incoming).
+
+Merge strategy:
+1. Combine information from both items.
+2. If fields conflict, use your best judgment to pick the more detailed or recent-looking value.
+3. If one item has a null/missing value and the other has data, keep the data.
+4. For list fields, combine unique elements from both.
+5. Do not invent new information not present in the inputs.
+6. Return the result in the exact JSON format of the input items."""
+
+MERGE_USER_PROMPT = """Item A (existing):\n{item_existing}\n\nItem B (incoming):\n{item_incoming}"""
+
+MERGE_DECISION_INSTRUCTION = """First decide whether Item A and Item B refer to the same logical entity (for entities) or the same logical relation (for relations). Use the merge strategy above only if they are the same.
+
+Return ONLY a JSON object with this exact structure (no markdown fences, no commentary):
+{
+  "duplicated": <true | false>,
+  "merged": <merged JSON object using the same keys as the inputs when duplicated=true; otherwise null>
+}"""
+
+
+def _struct_doc_template_id(doc: dict) -> str | None:
+    """Pull the (single) compilation_template_id out of an ES row.
+
+    Stored as a list to leave room for future cross-template merges; this
+    helper just returns the first non-empty entry, or None.
+    """
+    raw = doc.get("compilation_template_ids")
+    if isinstance(raw, list):
+        for v in raw:
+            if isinstance(v, str) and v.strip():
+                return v.strip()
+    if isinstance(raw, str) and raw.strip():
+        return raw.strip()
+    return None
+
+
+def _struct_filter_key(doc: dict) -> tuple:
+    """Bucket key for dedup candidates. Includes the template id so two
+    templates that emit a relation with the same (from, to) endpoints
+    don't merge across template boundaries."""
+    return (
+        doc.get("doc_id"),
+        doc.get("compile_kwd"),
+        doc.get("from_entity_kwd"),
+        doc.get("to_entity_kwd"),
+        _struct_doc_template_id(doc),
+    )
+
+
+# Backwards-compat aliases for the shared helpers. New code should call
+# the ``_common`` versions directly.
+_struct_doc_vec = _find_vec_field
+
+
+def _struct_union_chunk_ids(*chunk_id_lists) -> list:
+    """Order-preserving union (compat shim — prefer ``_common.union_ordered``)."""
+    normalized = [[chunk_ids] if isinstance(chunk_ids, str) else chunk_ids for chunk_ids in chunk_id_lists]
+    return _union_ordered(*normalized)
+
+
+async def _struct_merge_pair(existing: dict, incoming: dict, chat_mdl) -> dict | None:
+    """LLM-judged merge. Returns merged payload dict if duplicate, else None.
+
+    Operates on the payload (parsed ``content_with_weight``), not the ES doc
+    envelope. Caller is responsible for re-embedding and rebuilding the doc.
+    """
+    try:
+        existing_payload = json.loads(existing.get("content_with_weight") or "{}")
+        incoming_payload = json.loads(incoming.get("content_with_weight") or "{}")
+    except Exception:
+        logging.exception("merge: failed to parse content_with_weight")
+        return None
+    if not isinstance(existing_payload, dict) or not isinstance(incoming_payload, dict):
+        return None
+
+    user_prompt = MERGE_USER_PROMPT.format(
+        item_existing=json.dumps(existing_payload, ensure_ascii=False),
+        item_incoming=json.dumps(incoming_payload, ensure_ascii=False),
+    )
+    system_prompt = MERGE_SYSTEM_PROMPT + "\n\n" + MERGE_DECISION_INSTRUCTION
+    res = await gen_json(system_prompt, user_prompt, chat_mdl, gen_conf={"temperature": 0.0})
+    if not isinstance(res, dict):
+        return None
+    if not res.get("duplicated"):
+        return None
+    merged = res.get("merged")
+    if not isinstance(merged, dict):
+        return None
+    return merged
+
+
+def _struct_apply_merge_invariants(existing: dict, merged_payload: dict) -> dict:
+    """For relations, force the source/target fields back to the existing payload's
+    values — from_entity_kwd / to_entity_kwd must not change across a merge.
+    """
+    if existing.get("knowledge_graph_kwd") != "relation":
+        return merged_payload
+    try:
+        existing_payload = json.loads(existing.get("content_with_weight") or "{}")
+    except Exception:
+        return merged_payload
+    if not isinstance(existing_payload, dict):
+        return merged_payload
+    for field in ("source", "src", "from"):
+        if field in existing_payload:
+            merged_payload[field] = existing_payload[field]
+    for field in ("target", "tgt", "to"):
+        if field in existing_payload:
+            merged_payload[field] = existing_payload[field]
+    return merged_payload
+
+
+def _struct_rebuild_es_doc(
+    payload: dict,
+    base_doc: dict,
+    vec,
+    chunk_ids: list,
+    preserve_id: bool = True,
+) -> dict:
+    """Rebuild an ES doc from a merged payload using _struct_to_es_doc, then
+    overlay identity fields (id, from_entity_kwd, to_entity_kwd) from base_doc.
+    """
+    kind = base_doc.get("knowledge_graph_kwd") or "entity"
+    src_field = None
+    target_field = None
+    if kind == "relation":
+        try:
+            existing_payload = json.loads(base_doc.get("content_with_weight") or "{}")
+            if isinstance(existing_payload, dict):
+                if "source" in existing_payload and "target" in existing_payload:
+                    src_field, target_field = "source", "target"
+        except Exception:
+            pass
+
+    new_doc = _struct_to_es_doc(
+        payload=payload,
+        compile_kwd=base_doc.get("compile_kwd"),
+        doc_id=base_doc.get("doc_id"),
+        chunk_ids=chunk_ids,
+        vec=vec,
+        kind=kind,
+        src_field=src_field,
+        target_field=target_field,
+    )
+    if preserve_id and base_doc.get("id"):
+        new_doc["id"] = base_doc["id"]
+    # The spec forbids changing from_entity_kwd / to_entity_kwd on a merge.
+    for kwd in ("from_entity_kwd", "to_entity_kwd"):
+        if kwd in base_doc and base_doc[kwd]:
+            new_doc[kwd] = base_doc[kwd]
+    return new_doc
+
+
+async def _struct_reembed_payload(payload: dict, embd_mdl):
+    """Re-encode a merged payload's description with embd_mdl and return the vector."""
+    text = _struct_payload_description(payload)
+    vecs = await _struct_embed(embd_mdl, [text])
+    return vecs[0] if vecs else None
+
+
+async def _struct_local_dedup(
+    docs: list[dict],
+    chat_mdl,
+    embd_mdl,
+    similarity_threshold: float,
+) -> tuple[list[dict], int]:
+    """Single-pass dedup inside ``docs``. Returns (deduped, dropped_count)."""
+    from sklearn.metrics.pairwise import cosine_similarity
+
+    groups: dict = {}
+    order: list = []
+    for doc in docs:
+        key = _struct_filter_key(doc)
+        if key not in groups:
+            groups[key] = []
+            order.append(key)
+        groups[key].append(doc)
+
+    dropped = 0
+    deduped: list[dict] = []
+
+    for key in order:
+        kept: list[dict] = []
+        for incoming in groups[key]:
+            inc_field, inc_vec = _struct_doc_vec(incoming)
+            if not inc_vec or not kept:
+                kept.append(incoming)
+                continue
+            kept_with_vecs = []
+            for kd in kept:
+                _, kv = _struct_doc_vec(kd)
+                if kv is not None:
+                    kept_with_vecs.append((kd, kv))
+            if not kept_with_vecs:
+                kept.append(incoming)
+                continue
+            sims = cosine_similarity([list(inc_vec)], [list(v) for _, v in kept_with_vecs])[0]
+            sims_list = sims.tolist() if hasattr(sims, "tolist") else list(sims)
+            best_idx = max(range(len(sims_list)), key=lambda i: sims_list[i])
+            if sims_list[best_idx] < similarity_threshold:
+                kept.append(incoming)
+                continue
+            existing = kept_with_vecs[best_idx][0]
+            merged_payload = await _struct_merge_pair(existing, incoming, chat_mdl)
+            if merged_payload is None:
+                kept.append(incoming)
+                continue
+            merged_payload = _struct_apply_merge_invariants(existing, merged_payload)
+            merged_chunk_ids = _struct_union_chunk_ids(
+                existing.get("source_chunk_ids"),
+                incoming.get("source_chunk_ids"),
+            )
+            new_vec = await _struct_reembed_payload(merged_payload, embd_mdl)
+            if new_vec is None:
+                # Re-embed failed: keep existing, drop incoming silently.
+                dropped += 1
+                continue
+            rebuilt = _struct_rebuild_es_doc(
+                merged_payload,
+                existing,
+                new_vec,
+                merged_chunk_ids,
+                preserve_id=True,
+            )
+            # Replace the kept entry that matched.
+            for i, kd in enumerate(kept):
+                if kd is existing:
+                    kept[i] = rebuilt
+                    break
+            dropped += 1
+        deduped.extend(kept)
+
+    return deduped, dropped
+
+
+async def _struct_es_dedup_one(
+    doc: dict,
+    chat_mdl,
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    similarity_threshold: float,
+) -> str:
+    """Persist a single doc into ES with merge-or-insert semantics.
+
+    Returns one of: 'inserted', 'updated', 'skipped'.
+    """
+    from common import settings
+    from rag.nlp import search as _rag_search
+    from common.doc_store.doc_store_base import MatchDenseExpr, OrderByExpr
+
+    index = _rag_search.index_name(tenant_id)
+
+    condition = {
+        "compile_kwd": [doc["compile_kwd"]],
+        "doc_id": [doc["doc_id"]],
+    }
+    if doc.get("knowledge_graph_kwd"):
+        condition["knowledge_graph_kwd"] = [doc["knowledge_graph_kwd"]]
+    if doc.get("from_entity_kwd"):
+        condition["from_entity_kwd"] = [doc["from_entity_kwd"]]
+    if doc.get("to_entity_kwd"):
+        condition["to_entity_kwd"] = [doc["to_entity_kwd"]]
+    # KNN dedup must stay within the same template — two templates can
+    # produce identical-looking entities (e.g. two "list" kinds extracting
+    # the same headline) but they live on independent tabs in the UI.
+    incoming_template = _struct_doc_template_id(doc)
+    if incoming_template:
+        condition["compilation_template_ids"] = [incoming_template]
+
+    vec_field, vec = _struct_doc_vec(doc)
+    if not vec_field or vec is None:
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+        return "inserted"
+
+    match_expr = MatchDenseExpr(
+        vector_column_name=vec_field,
+        embedding_data=list(vec),
+        embedding_data_type="float",
+        distance_type="cosine",
+        topn=1,
+        extra_options={"similarity": similarity_threshold},
+    )
+    select_fields = [
+        "id",
+        "content_with_weight",
+        "source_chunk_ids",
+        "knowledge_graph_kwd",
+        "compile_kwd",
+        "doc_id",
+        "from_entity_kwd",
+        "to_entity_kwd",
+    ]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [match_expr],
+            OrderByExpr(),
+            0,
+            1,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("merge_compiled_structures: ES KNN search failed; inserting as new")
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+        return "inserted"
+
+    if not field_map:
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+        return "inserted"
+
+    old_id, old_doc = next(iter(field_map.items()))
+    old_doc = dict(old_doc)
+    old_doc.setdefault("id", old_id)
+
+    merged_payload = await _struct_merge_pair(old_doc, doc, chat_mdl)
+    if merged_payload is None:
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+        return "inserted"
+
+    merged_payload = _struct_apply_merge_invariants(old_doc, merged_payload)
+    merged_chunk_ids = _struct_union_chunk_ids(
+        old_doc.get("source_chunk_ids"),
+        doc.get("source_chunk_ids"),
+    )
+    new_vec = await _struct_reembed_payload(merged_payload, embd_mdl)
+    if new_vec is None:
+        return "skipped"
+
+    rebuilt = _struct_rebuild_es_doc(
+        merged_payload,
+        old_doc,
+        new_vec,
+        merged_chunk_ids,
+        preserve_id=True,
+    )
+    update_fields = {k: v for k, v in rebuilt.items() if k != "id"}
+    try:
+        await thread_pool_exec(
+            settings.docStoreConn.update,
+            {"id": old_id},
+            update_fields,
+            index,
+            kb_id,
+        )
+        return "updated"
+    except Exception:
+        logging.exception("merge_compiled_structures: ES update failed for id %s", old_id)
+        return "skipped"
+
+
+def _struct_graph_row_id(
+    doc_id: str,
+    compile_kwd: str,
+    compilation_template_id: str | None = None,
+) -> str:
+    """Stable id per (doc, compile_kwd, template). Without the template
+    suffix, two templates sharing a compile_kwd (e.g. both ``list``)
+    would overwrite each other's per-doc graph JSON row."""
+    tpl_part = compilation_template_id or ""
+    return xxhash.xxh64(
+        f"{doc_id}:structure_graph:{compile_kwd}:{tpl_part}".encode(
+            "utf-8",
+            "surrogatepass",
+        ),
+    ).hexdigest()
+
+
+async def _struct_rebuild_graph_json(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+    compile_kwd: str,
+    compilation_template_id: str | None = None,
+) -> dict:
+    from common import settings
+    from rag.nlp import search as _rag_search
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    index = _rag_search.index_name(tenant_id)
+    fields = ["content_with_weight", "knowledge_graph_kwd", "source_chunk_ids"]
+    condition: dict = {
+        "doc_id": [doc_id],
+        "compile_kwd": [compile_kwd],
+        "knowledge_graph_kwd": ["entity", "relation"],
+    }
+    if compilation_template_id:
+        condition["compilation_template_ids"] = [compilation_template_id]
+    res = await thread_pool_exec(
+        settings.docStoreConn.search,
+        fields,
+        [],
+        condition,
+        [],
+        OrderByExpr(),
+        0,
+        10000,
+        index,
+        [kb_id],
+    )
+    rows = settings.docStoreConn.get_fields(res, fields)
+    entities: list[dict] = []
+    relations: list[dict] = []
+    for row in rows.values():
+        payload = _struct_load_payload(row)
+        if row.get("knowledge_graph_kwd") == "relation":
+            relation = _struct_graph_relation(payload)
+            if relation:
+                relations.append(relation)
+        else:
+            entity = _struct_graph_entity(payload, row.get("source_chunk_ids"))
+            if entity:
+                entities.append(entity)
+
+    return {
+        "entities": _struct_merge_graph_entities(entities),
+        "relations": relations,
+    }
+
+
+async def _struct_upsert_graph_json(
+    graph: dict,
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+    compile_kwd: str,
+    compilation_template_id: str | None = None,
+) -> None:
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    row_id = _struct_graph_row_id(doc_id, compile_kwd, compilation_template_id)
+    row = {
+        "id": row_id,
+        "content_with_weight": json.dumps(graph, ensure_ascii=False),
+        "compile_kwd": compile_kwd,
+        "knowledge_graph_kwd": "graph",
+        "doc_id": doc_id,
+        "kb_id": kb_id,
+        "available_int": 0,
+    }
+    if compilation_template_id:
+        row["compilation_template_ids"] = [compilation_template_id]
+    old = await thread_pool_exec(settings.docStoreConn.get, row_id, index, [kb_id])
+    if old:
+        await thread_pool_exec(
+            settings.docStoreConn.update,
+            {"id": row_id},
+            {k: v for k, v in row.items() if k != "id"},
+            index,
+            kb_id,
+        )
+    else:
+        await thread_pool_exec(settings.docStoreConn.insert, [row], index, kb_id)
+
+
+async def rebuild_structure_graph_json(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+    compile_kwd: str,
+    compilation_template_id: str | None = None,
+) -> dict:
+    """Rebuild and persist the compact document-scoped structure graph,
+    scoped to one (doc, compile_kwd, template_id) triple."""
+    graph = await _struct_rebuild_graph_json(
+        tenant_id,
+        kb_id,
+        doc_id,
+        compile_kwd,
+        compilation_template_id,
+    )
+    await _struct_upsert_graph_json(
+        graph,
+        tenant_id,
+        kb_id,
+        doc_id,
+        compile_kwd,
+        compilation_template_id,
+    )
+    return graph
+
+
+# ---------------------------------------------------------------------------
+# Chain-shape validation for ``list`` / ``timeline`` kinds.
+#
+# Both kinds model a strict linear chain of entities (one predecessor,
+# one successor, no cycles). The per-chunk extractor is happy to emit
+# branches / cycles when the source text supports multiple readings, so
+# we validate the relation set post-extraction and ask the LLM to pick
+# the correct chain out of the offenders. On any failure (timeout,
+# exception, malformed LLM output) the validator returns the input
+# untouched — correction is best-effort.
+# ---------------------------------------------------------------------------
+
+# Kinds whose relations must form a strict linear chain.
+CHAIN_KINDS: tuple[str, ...] = ("list", "timeline")
+
+# Max source-chunk text length passed to the LLM in the correction prompt.
+_CHAIN_CORRECTION_MAX_CHUNK_CHARS = 8196
+_CHAIN_CORRECTION_MAX_CHUNKS = 12
+
+
+CHAIN_CORRECTION_PROMPT = """You are correcting an extracted {kind}-kind structure.
+
+Constraint: the relations must form a strict linear chain — every entity has
+at most one predecessor and at most one successor, and there must be no
+cycle. The relations below were flagged by an automated detector as
+violating this constraint. Each one carries the issue that was detected.
+
+Bad relations (review and keep only those supported by the source text):
+{bad_relations_json}
+
+Source chunks the relations were extracted from:
+{source_chunks_text}
+
+Your task: from the bad relations above, pick the subset that should be
+kept. Drop the rest. Do not invent new relations. Use only ``from`` and
+``to`` slugs that appear verbatim in the bad-relations list. The result
+must satisfy the strict-chain constraint.
+
+Return ONLY a JSON object with this exact shape (no markdown fences, no
+commentary):
+{{
+  "keep": [
+    {{"from": "<slug>", "to": "<slug>"}},
+    ...
+  ]
+}}
+"""
+
+
+def _chain_extract_edge(doc: dict) -> tuple[str, str] | None:
+    """Return ``(from_slug, to_slug)`` for a relation doc, or None."""
+    if doc.get("knowledge_graph_kwd") != "relation":
+        return None
+    src = doc.get("from_entity_kwd")
+    tgt = doc.get("to_entity_kwd")
+    if isinstance(src, str) and isinstance(tgt, str) and src.strip() and tgt.strip():
+        return src.strip(), tgt.strip()
+    # Fallback: parse the payload — older relation docs may not have the
+    # *_entity_kwd columns set if the upstream extractor was permissive.
+    try:
+        payload = json.loads(doc.get("content_with_weight") or "{}")
+    except Exception:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    for src_key, tgt_key in (("source", "target"), ("from", "to"), ("src", "tgt")):
+        s = payload.get(src_key)
+        t = payload.get(tgt_key)
+        if isinstance(s, str) and isinstance(t, str) and s.strip() and t.strip():
+            return s.strip(), t.strip()
+    return None
+
+
+def _chain_detect_violations(
+    edges: list[tuple[str, str]],
+) -> dict[tuple[str, str], list[str]]:
+    """Walk the edge list once and return ``{edge: [issue_strings]}`` for
+    every edge involved in any of:
+
+    * **self-loop** — ``from == to``.
+    * **fan-out** — multiple edges share the same ``from``.
+    * **fan-in** — multiple edges share the same ``to``.
+    * **cycle** — the edge participates in a directed cycle (size ≥ 2).
+
+    Edges with no issues are simply absent from the result dict.
+    """
+    issues: dict[tuple[str, str], list[str]] = {}
+
+    def _add(edge: tuple[str, str], reason: str) -> None:
+        issues.setdefault(edge, []).append(reason)
+
+    # Self-loops + degree counts.
+    out_groups: dict[str, list[tuple[str, str]]] = {}
+    in_groups: dict[str, list[tuple[str, str]]] = {}
+    for e in edges:
+        if e[0] == e[1]:
+            _add(e, "self-loop")
+        out_groups.setdefault(e[0], []).append(e)
+        in_groups.setdefault(e[1], []).append(e)
+
+    for node, group in out_groups.items():
+        if len(group) > 1:
+            siblings = sorted({g[1] for g in group})
+            reason = f"fan-out from '{node}' (also points to {siblings})"
+            for e in group:
+                _add(e, reason)
+    for node, group in in_groups.items():
+        if len(group) > 1:
+            siblings = sorted({g[0] for g in group})
+            reason = f"fan-in to '{node}' (also reached from {siblings})"
+            for e in group:
+                _add(e, reason)
+
+    # Cycle detection — Tarjan SCC. Any SCC of size ≥ 2 is a cycle; any
+    # self-loop already caught above is its own size-1 SCC and is
+    # excluded here.
+    adj: dict[str, list[str]] = {}
+    nodes: set[str] = set()
+    for src, tgt in edges:
+        nodes.add(src)
+        nodes.add(tgt)
+        adj.setdefault(src, []).append(tgt)
+
+    index_counter = [0]
+    stack: list[str] = []
+    on_stack: set[str] = set()
+    index: dict[str, int] = {}
+    lowlink: dict[str, int] = {}
+    sccs: list[set[str]] = []
+
+    def _strongconnect(v: str) -> None:
+        index[v] = index_counter[0]
+        lowlink[v] = index_counter[0]
+        index_counter[0] += 1
+        stack.append(v)
+        on_stack.add(v)
+        for w in adj.get(v, ()):
+            if w not in index:
+                _strongconnect(w)
+                lowlink[v] = min(lowlink[v], lowlink[w])
+            elif w in on_stack:
+                lowlink[v] = min(lowlink[v], index[w])
+        if lowlink[v] == index[v]:
+            comp: set[str] = set()
+            while True:
+                w = stack.pop()
+                on_stack.discard(w)
+                comp.add(w)
+                if w == v:
+                    break
+            if len(comp) >= 2:
+                sccs.append(comp)
+
+    for n in nodes:
+        if n not in index:
+            try:
+                _strongconnect(n)
+            except RecursionError:
+                # Pathologically deep relation graphs — skip cycle
+                # detection rather than crashing the whole flush.
+                logging.warning("chain validate: cycle detection hit recursion limit")
+                break
+
+    for comp in sccs:
+        for src, tgt in edges:
+            if src in comp and tgt in comp:
+                _add((src, tgt), f"cycle within {sorted(comp)}")
+
+    return issues
+
+
+def _chain_gather_chunk_text(
+    bad_docs: list[dict],
+    chunks_by_id: dict[str, str],
+) -> list[tuple[str, str]]:
+    """Collect (chunk_id, text) pairs for the LLM prompt — deduplicated,
+    capped at ``_CHAIN_CORRECTION_MAX_CHUNKS`` chunks, each trimmed to
+    ``_CHAIN_CORRECTION_MAX_CHUNK_CHARS`` characters."""
+    seen: set[str] = set()
+    out: list[tuple[str, str]] = []
+    for doc in bad_docs:
+        for cid in doc.get("source_chunk_ids") or []:
+            if not isinstance(cid, str) or cid in seen:
+                continue
+            seen.add(cid)
+            text = chunks_by_id.get(cid)
+            if not isinstance(text, str) or not text.strip():
+                continue
+            out.append((cid, text[:_CHAIN_CORRECTION_MAX_CHUNK_CHARS]))
+            if len(out) >= _CHAIN_CORRECTION_MAX_CHUNKS:
+                return out
+    return out
+
+
+async def validate_and_correct_chain(
+    docs: list[dict],
+    chunks_by_id: dict[str, str],
+    chat_mdl,
+    kind: str,
+    callback=None,
+) -> list[dict]:
+    """Ensure the chain-shape constraint on ``docs`` (a flush-time mixed
+    list of entity and relation docs). On finding a violation we ask the
+    LLM to pick the subset of the offending relations that should be
+    kept; the dropped offenders are removed from the returned list.
+
+    Best-effort: any exception during detection or LLM call results in
+    ``docs`` being returned verbatim, so a misbehaving model can never
+    block the merge phase. Callers are still responsible for wrapping
+    the call in their own timeout if they want a hard wall.
+    """
+    if not docs or kind not in CHAIN_KINDS:
+        return docs
+
+    try:
+        # Bucket: relations keyed by edge for later removal.
+        edge_to_docs: dict[tuple[str, str], list[dict]] = {}
+        all_edges: list[tuple[str, str]] = []
+        for d in docs:
+            e = _chain_extract_edge(d)
+            if e is None:
+                continue
+            edge_to_docs.setdefault(e, []).append(d)
+            all_edges.append(e)
+
+        violations = _chain_detect_violations(all_edges)
+        if not violations:
+            return docs
+
+        bad_edges = list(violations.keys())
+        bad_docs: list[dict] = []
+        for e in bad_edges:
+            bad_docs.extend(edge_to_docs.get(e, ()))
+
+        bad_relations_repr = [{"from": e[0], "to": e[1], "issue": "; ".join(reasons)} for e, reasons in violations.items()]
+        chunk_pairs = _chain_gather_chunk_text(bad_docs, chunks_by_id)
+        source_chunks_text = "\n\n".join(f"[{cid}]\n{text}" for cid, text in chunk_pairs) or "(no source chunks available)"
+        prompt = CHAIN_CORRECTION_PROMPT.format(
+            kind=kind,
+            bad_relations_json=json.dumps(bad_relations_repr, ensure_ascii=False),
+            source_chunks_text=source_chunks_text,
+        )
+        if callable(callback):
+            try:
+                callback(msg=f"chain validation: {len(bad_edges)} flagged for LLM correction")
+            except Exception:
+                pass
+
+        res = await gen_json(
+            "You correct extracted graph relations to satisfy a strict-chain constraint.",
+            prompt,
+            chat_mdl,
+            gen_conf={"temperature": 0.0},
+        )
+    except Exception:
+        logging.exception("chain validate: detection / LLM call failed; skipping correction")
+        return docs
+
+    if not isinstance(res, dict):
+        return docs
+    keep_raw = res.get("keep")
+    if not isinstance(keep_raw, list):
+        return docs
+
+    bad_edge_set = set(bad_edges)
+    keep_set: set[tuple[str, str]] = set()
+    for item in keep_raw:
+        if not isinstance(item, dict):
+            continue
+        s = item.get("from")
+        t = item.get("to")
+        if not isinstance(s, str) or not isinstance(t, str):
+            continue
+        edge = (s.strip(), t.strip())
+        # Reject anything that wasn't in the bad set — we don't invent
+        # new relations and we don't allow the LLM to "rescue" a
+        # never-extracted edge.
+        if edge in bad_edge_set:
+            keep_set.add(edge)
+
+    if keep_set == bad_edge_set:
+        # LLM kept everything → no correction applied.
+        return docs
+
+    # Drop the bad-edge docs that the LLM didn't keep.
+    dropped_doc_ids: set[str] = set()
+    for edge in bad_edge_set - keep_set:
+        for d in edge_to_docs.get(edge, ()):
+            did = d.get("id")
+            if isinstance(did, str):
+                dropped_doc_ids.add(did)
+
+    if not dropped_doc_ids:
+        return docs
+
+    corrected = [d for d in docs if d.get("id") not in dropped_doc_ids]
+    if callable(callback):
+        try:
+            callback(msg=f"chain validation: dropped {len(dropped_doc_ids)} of {len(bad_edges)} flagged relation(s)")
+        except Exception:
+            pass
+    return corrected
+
+
+async def merge_compiled_structures(
+    docs: list[dict],
+    chat_mdl,
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    similarity_threshold: float = 0.99,
+    compilation_template_id: str | None = None,
+    cancel_check: Callable[[], bool] | None = None,
+) -> dict:
+    """Merge ``docs`` (the output of ``compile_structure_from_text``) before
+    inserting them into ES.
+
+    Two phases:
+        1. **Local dedup**: bucket by (doc_id, compile_kwd, from_entity_kwd?,
+           to_entity_kwd?), pairwise cosine similarity over the q_<dim>_vec
+           field via ``sklearn.metrics.pairwise.cosine_similarity``; pairs
+           above ``similarity_threshold`` go through ``_struct_merge_pair``
+           (LLM-judged). On a duplicate verdict the surviving entry is
+           rebuilt from the merged payload (union of ``source_chunk_ids``,
+           re-embedded, src/target preserved on relations).
+        2. **ES dedup**: for each surviving doc, KNN-search ES with the same
+           filter via ``MatchDenseExpr`` (top1, similarity ≥ threshold). On a
+           hit + LLM duplicate verdict, the existing ES doc is replaced
+           **by its old id** (`settings.docStoreConn.update`). Otherwise the
+           doc is inserted as new.
+
+    Args:
+        docs: list of ES-ready dicts from ``compile_structure_from_text``.
+        chat_mdl: LLMBundle for chat (used to judge duplicate-ness + emit
+            merged JSON via ``gen_json``).
+        embd_mdl: LLMBundle for embeddings (used to re-embed merged
+            descriptions before persistence).
+        tenant_id, kb_id: address the doc-store index for the current KB.
+        similarity_threshold: minimum cosine similarity for a pair to be
+            considered for LLM-judged merge.
+        cancel_check: optional callable returning True when the owning parse
+            task has been canceled. Checked between ES-dedup iterations so a
+            long merge can stop promptly.
+
+    Returns:
+        {"inserted": N, "updated": M, "duplicates_dropped": K} summary.
+    """
+    if not docs:
+        return {"inserted": 0, "updated": 0, "duplicates_dropped": 0}
+
+    deduped, dropped = await _struct_local_dedup(
+        docs,
+        chat_mdl,
+        embd_mdl,
+        similarity_threshold,
+    )
+
+    graph_keys = {
+        (
+            str(d.get("doc_id")),
+            str(d.get("compile_kwd")),
+            _struct_doc_template_id(d) or compilation_template_id or "",
+        )
+        for d in deduped
+        if d.get("doc_id") and d.get("compile_kwd") and d.get("knowledge_graph_kwd") in ("entity", "relation")
+    }
+
+    def _raise_if_canceled() -> None:
+        if callable(cancel_check) and cancel_check():
+            raise TaskCanceledException("Task was cancelled during structure ES dedup merge")
+
+    inserted = 0
+    updated = 0
+    for d in deduped:
+        _raise_if_canceled()
+        try:
+            result = await _struct_es_dedup_one(
+                d,
+                chat_mdl,
+                embd_mdl,
+                tenant_id,
+                kb_id,
+                similarity_threshold,
+            )
+        except Exception:
+            logging.exception("merge_compiled_structures: per-doc dedup failed")
+            continue
+        if result == "inserted":
+            inserted += 1
+        elif result == "updated":
+            updated += 1
+
+    graphs = 0
+    for doc_id, compile_kwd, template_id in graph_keys:
+        _raise_if_canceled()
+        try:
+            await rebuild_structure_graph_json(
+                tenant_id,
+                kb_id,
+                doc_id,
+                compile_kwd,
+                compilation_template_id=template_id or None,
+            )
+            graphs += 1
+        except Exception:
+            logging.exception(
+                "merge_compiled_structures: graph rebuild failed for doc=%s compile_kwd=%s template=%s",
+                doc_id,
+                compile_kwd,
+                template_id,
+            )
+
+    return {
+        "inserted": inserted,
+        "updated": updated,
+        "duplicates_dropped": dropped,
+        "graphs": graphs,
+    }
+
+
+__all__ = [
+    "compile_structure_from_text",
+    "merge_compiled_structures",
+    "rebuild_structure_graph_json",
+]
diff --git a/rag/advanced_rag/knowlege_compile/wiki.py b/rag/advanced_rag/knowlege_compile/wiki.py
new file mode 100644
index 0000000000..4ce2722355
--- /dev/null
+++ b/rag/advanced_rag/knowlege_compile/wiki.py
@@ -0,0 +1,3576 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+"""WIKI compilation pipeline — MAP phase.
+
+  - Chunks come from ES (or any pre-chunked list passed in by the caller).
+    No outline-driven chunking; per-chunk byte offsets are not tracked.
+  - The LLM goes through ``rag.prompts.generator.gen_json`` (json_repair-backed).
+    Embeddings go through ``LLMBundle.encode`` via ``thread_pool_exec`` (kept
+    in the signature for symmetry with the downstream REDUCE / REFINE phases
+    even though MAP itself does not embed).
+  - Citation anchor is the source chunk id (``source_chunk_ids`` list per item), not
+    a byte position. The LLM is prompted to tag each extracted item with the
+    ``[CHUNK_ID …]`` of the chunk it came from.
+  - Resume: per-chunk extracts are persisted to ES under
+    ``compile_kwd="artifact_map_extract"`` with ``available_int=0`` and no vector
+    / token-list fields, so retrievers ignore them but downstream phases can
+    fetch them by ``doc_id`` + ``source_chunk_ids``. Re-running MAP for the same
+    ``doc_id`` skips chunks that already have an extract row.
+
+Public entry: ``wiki_map_from_chunks``.
+"""
+
+import asyncio
+import json
+import logging
+import re
+from typing import Callable, Optional
+from common.misc_utils import thread_pool_exec
+from common.token_utils import num_tokens_from_string
+from rag.prompts.generator import gen_json, message_fit_in
+
+import xxhash as _xxhash
+
+from ._common import (
+    build_chunk_batches as _build_chunk_batches,
+    bulk_dedup_items as _bulk_dedup_items,
+    ensure_llm_bundle as _ensure_llm_bundle,
+    run_chunked_pipeline as _run_chunked_pipeline,
+    stable_row_id as _stable_row_id,
+)
+
+
+# Global pipeline-rev — bumping this constant invalidates every cached
+# artifact_map_extract / artifact_reduce_result / artifact_compilation_plan
+# / artifact_page_draft / artifact_page row on the next re-run. Use it
+# when a prompt or extraction schema changes in a way that should
+# invalidate prior caches.
+_WIKI_PIPELINE_REV = "v1"
+
+
+def _chunk_hash(content: str) -> str:
+    """xxh64 of a chunk's ``content_with_weight`` mixed with the global
+    pipeline rev. The mix-in means a prompt / schema bump invalidates
+    every cached row without us having to touch each row individually.
+    """
+    body = (content or "") + "|" + _WIKI_PIPELINE_REV
+    return _xxhash.xxh64(body.encode("utf-8", "surrogatepass")).hexdigest()
+
+
+# Tiny parser_config helpers shared with the structure pipeline. Pulled in
+# here so the MAP entity/relation schemas and rules can be driven from the
+# same ``parser_config`` shape that ``compile_structure_from_text`` uses.
+from .structure import (
+    _struct_get,
+    _struct_localize,
+)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+WIKI_MAP_COMPILE_KWD = "artifact_map_extract"
+DEFAULT_WIKI_MAP_WORKERS = 6
+DEFAULT_WIKI_MAP_TIMEOUT = 600
+
+
+WIKI_MAP_SYSTEM = (
+    "You are a knowledge extraction engine. Extract structured knowledge from the "
+    "provided document section. Return ONLY valid JSON matching the schema exactly. "
+    "Never include any text outside the JSON object. If a category has no items, use []."
+    "Keep the chunks' original language (Chinese/English etc.) for generated data."
+)
+
+
+_DEFAULT_ENTITY_SCHEMA_BODY = (
+    '      "name": "string — entity canonical name as it appears in text",\n'
+    '      "type": "string — one of: person|org|product|regulation|location|system|equipment|other",\n'
+    '      "aliases": ["string"],\n'
+    '      "source_chunk_id": "string — exact value from the chunk_id list above"'
+)
+
+_DEFAULT_RELATION_SCHEMA_BODY = (
+    '      "from": "string — source entity/concept name",\n'
+    '      "to": "string — target entity/concept name",\n'
+    '      "type": "string — e.g. owns|part_of|caused_by|regulates|uses|located_in|other",\n'
+    '      "source_chunk_id": "string — exact value from the chunk_id list above"'
+)
+
+
+WIKI_MAP_USER_TEMPLATE = """\
+## Document context
+Document id: {doc_id}
+Batch contains {chunk_count} packed chunk(s). Each chunk is introduced by a
+``[CHUNK_ID <id>]`` line. The chunk_id values to choose from are:
+{chunk_id_list}
+
+## Packed chunks
+{packed_chunks}
+
+---
+
+Extract all knowledge from every chunk and return a single JSON object with this
+exact schema:
+
+{{
+  "entities": [
+    {{
+      "name": "string - entity canonical name as it appears in text",
+      "type": "string - {entity_type_rules}",
+      "aliases": ["string"],
+      "source_chunk_id": "string - exact value from the chunk_id list above"
+    }}
+  ],
+  "concepts": [
+    {{
+      "term": "string - {concept_term}",
+      "definition_excerpt": "string - {concept_definition_excerpt}",
+      "source_chunk_id": "string - exact value from the chunk_id list above"
+    }}
+  ],
+  "claims": [
+    {{
+      "statement": "string - {claim_statement}",
+      "subject": "string - {claim_subject}",
+      "confidence": "explicit",
+      "source_chunk_id": "string - exact value from the chunk_id list above"
+    }}
+  ],
+  "relations": [
+    {{
+      "from": "string - source entity/concept name",
+      "to": "string - target entity/concept name",
+      "type": "string - {relation_type_rules}",
+      "source_chunk_id": "string - exact value from the chunk_id list above"
+    }}
+  ],
+  "topics": ["string"]
+}}
+
+Rules:
+- ``source_chunk_id`` MUST be one of the chunk_id values listed above (they
+  look like ``C1``, ``C2``, …); do NOT invent new ids. Pick the chunk where
+  the item is primarily stated.
+- The ``[CHUNK_ID …]`` header lines AND the ``C1``/``C2``/… chunk tags are
+  prompt scaffolding — they are NOT part of the document content. Do NOT
+  extract them (or any other identifier-looking strings from the headers)
+  as entities, concepts, claims, or relations. Entity ``name`` / concept
+  ``term`` values must come from the human-readable chunk body only.
+- NEVER use bare hexadecimal hashes (such as ``a3f1b2c4d5e6f7a8``),
+  UUIDs, database row ids, or any other opaque identifier-looking token
+  as an entity ``name`` or concept ``term``. If you cannot find a
+  human-readable name for a candidate entity in the chunk body, drop it.
+- Concrete examples of values that are ALWAYS WRONG:
+    BAD entity: {{"name": "C1", "type": "product", "aliases": ["C1"]}}
+    BAD entity: {{"name": "C3", "type": "location"}}
+    BAD concept: {{"term": "C2"}}
+    BAD entity: {{"name": "d523a888c5b2a167", "type": "location"}}
+    BAD entity: {{"name": "41a5271858ca11f1bbb9047c16ec874f", "type": "product"}}
+  ``C1`` / ``C2`` / etc. are CHUNK TAGS, not products or locations. The
+  hex hashes are DATABASE IDS, not entities. If your candidate ``name``
+  matches any of these shapes, do not include the item in the output.
+- ``confidence`` is ``"explicit"`` (directly stated) or ``"inferred"`` (implied
+  by the text).
+- Be exhaustive — include all named entities, defined terms, and factual claims.
+- For ``concepts``, extract BOTH (a) named terms with definitions AND (b)
+  coherent thematic sub-topics that could become their own wiki page.
+- Extract ``claims`` LIBERALLY: every factual sentence about an entity is a
+  claim. Definitions, attributes, ownership, locations, dates, actions,
+  events, financial figures, regulations cited — all qualify. If you
+  extract an entity, you should usually extract one or more claims that
+  mention it. An empty ``claims`` array is almost always wrong unless the
+  chunks are pure boilerplate.
+- ``relations`` only fire when the text states an explicit link between two
+  named entities/concepts (``A owns B``, ``A is part of B``, ``A regulates B``).
+  Otherwise leave ``relations`` empty.
+- Return empty arrays ``[]`` for categories with no findings.
+- Return ONLY the JSON object, no markdown fences, no commentary.
+{custom_rules}"""
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_EXTRACT_LIST_KEYS = ("entities", "concepts", "claims", "relations")
+
+
+def _wiki_empty_extract() -> dict:
+    return {
+        "entities": [],
+        "concepts": [],
+        "claims": [],
+        "relations": [],
+        "topics": [],
+    }
+
+
+# ---- parser_config-driven schema rendering --------------------------------
+#
+# artifact MAP's prompt previously hardcoded the entity & relation field set
+# (name/type/aliases for entities; from/to/type for relations). The shape
+# is now driven by ``parser_config.output.entities.fields`` and
+# ``parser_config.output.relations.fields`` — the same YAML-style config
+# used by ``compile_structure_from_text``. When no fields are configured
+# (or no parser_config is passed) we fall back to the original artifact
+# defaults so existing call sites keep working.
+
+
+def _wiki_render_schema_body(fields, language: str, default_body: str, *, indent: int = 6) -> str:
+    """Render the JSON body for one item in the entity/relation schema.
+
+    Produces one line per field of the form::
+
+        "<field>": <placeholder>
+
+    where ``<placeholder>`` carries a ``string — <description>`` hint for
+    string fields (so the LLM sees the user's intent) or a typed example
+    for list/int/float/bool fields. Always appends ``source_chunk_id`` as
+    the final field — chunk attribution is structural and not user-tunable.
+
+    Falls back to ``default_body`` when ``fields`` is empty or only contains
+    invalid entries.
+    """
+    if not fields:
+        return default_body
+
+    pad = " " * indent
+    lines: list[str] = []
+    seen: set[str] = set()
+    for f in fields:
+        if not isinstance(f, dict):
+            continue
+        name = f.get("name") or ""
+        name = name.strip() if isinstance(name, str) else ""
+        if not name or name in seen or name == "source_chunk_id":
+            # Skip duplicates and any user-supplied source_chunk_id — we
+            # always append our canonical one below.
+            continue
+        seen.add(name)
+
+        ftype = f.get("type", "str")
+        desc = _struct_localize(f.get("description", ""), language)
+        if ftype == "list":
+            placeholder = '["string"]'
+        elif ftype == "int":
+            placeholder = "0"
+        elif ftype == "float":
+            placeholder = "0.0"
+        elif ftype == "bool":
+            placeholder = "false"
+        else:
+            if desc:
+                # Strip newlines and curly braces from the description so it
+                # doesn't break the prompt's JSON layout or str.format.
+                safe = desc.replace("\n", " ").replace("{", "(").replace("}", ")").strip()
+                placeholder = f'"string — {safe}"'
+            else:
+                placeholder = '"string"'
+        lines.append(f'{pad}"{name}": {placeholder}')
+
+    if not lines:
+        return default_body
+
+    lines.append(f'{pad}"source_chunk_id": "string — exact value from the chunk_id list above"')
+    return ",\n".join(lines)
+
+
+def _wiki_build_custom_rules(parser_config, language: str) -> str:
+    """Concatenate user-provided entity/relation rules into bullet-style
+    sections appended to the prompt's Rules section.
+
+    Returns an empty string when no parser_config rules are present so the
+    template's trailing triple-quote closes cleanly without an extra blank
+    line.
+    """
+    if not isinstance(parser_config, dict):
+        return ""
+
+    guideline = _struct_get(parser_config, "guideline", default={}) or {}
+    rules_e = _struct_localize(_struct_get(guideline, "rules_for_entities"), language)
+    rules_r = _struct_localize(_struct_get(guideline, "rules_for_relations"), language)
+
+    sections: list[str] = []
+    if rules_e:
+        sections.append("## Entity extraction rules (from knowledge base config):\n" + rules_e)
+    if rules_r:
+        sections.append("## Relation extraction rules (from knowledge base config):\n" + rules_r)
+
+    if not sections:
+        return ""
+    return "\n" + "\n\n".join(sections) + "\n"
+
+
+def _wiki_template_fields(parser_config, section: str) -> list:
+    if not isinstance(parser_config, dict):
+        return []
+    cfg = _struct_get(parser_config, section, default={}) or {}
+    fields = _struct_get(cfg, "fields", default=[]) or []
+    return fields if isinstance(fields, list) else []
+
+
+def _wiki_type_rules(fields: list) -> str:
+    lines: list[str] = []
+    for field in fields:
+        if not isinstance(field, dict):
+            continue
+        typ = field.get("type")
+        typ = typ.strip() if isinstance(typ, str) else ""
+        if not typ:
+            continue
+        description = field.get("description")
+        description = description.strip() if isinstance(description, str) else ""
+        rule = field.get("rule")
+        rule = rule.strip() if isinstance(rule, str) else ""
+        lines.append(f"type: {typ}")
+        if description:
+            lines.append(f"  - discription: {description}")
+        if rule:
+            lines.append(f"  - rule: {rule}")
+    return "\n".join(lines)
+
+
+def _wiki_pipe_join(fields: list, key: str) -> str:
+    values: list[str] = []
+    for field in fields:
+        if not isinstance(field, dict):
+            continue
+        value = field.get(key)
+        value = value.strip() if isinstance(value, str) else ""
+        if value:
+            values.append(value)
+    return "|".join(values)
+
+
+def _wiki_colon_join(fields: list, left_key: str, right_key: str) -> str:
+    values: list[str] = []
+    for field in fields:
+        if not isinstance(field, dict):
+            continue
+        left = field.get(left_key)
+        left = left.strip() if isinstance(left, str) else ""
+        right = field.get(right_key)
+        right = right.strip() if isinstance(right, str) else ""
+        if left or right:
+            values.append(f"{left}:{right}")
+    return "\n".join(values)
+
+
+def _wiki_named_field_description(fields: list, name: str) -> str:
+    for field in fields:
+        if not isinstance(field, dict):
+            continue
+        field_name = field.get("name")
+        field_name = field_name.strip().lower() if isinstance(field_name, str) else ""
+        if field_name == name:
+            description = field.get("description")
+            description = description.strip() if isinstance(description, str) else ""
+            if description:
+                return description
+        legacy = field.get(name)
+        legacy = legacy.strip() if isinstance(legacy, str) else ""
+        if legacy:
+            return legacy
+    return ""
+
+
+def _wiki_template_custom_rules(parser_config) -> str:
+    if not isinstance(parser_config, dict):
+        return ""
+    rules = parser_config.get("global_rules")
+    return rules.strip() if isinstance(rules, str) else ""
+
+
+def _wiki_build_user_prompt(
+    *,
+    parser_config,
+    language: str,
+    doc_id,
+    chunk_count: int,
+    chunk_id_list: str,
+    packed_chunks: str,
+) -> str:
+    """Fill ``WIKI_MAP_USER_TEMPLATE`` with the dynamic entity / relation
+    schema bodies plus optional rules drawn from ``parser_config``."""
+    ent_fields = _wiki_template_fields(parser_config, "entity")
+    rel_fields = _wiki_template_fields(parser_config, "relation")
+    concept_fields = _wiki_template_fields(parser_config, "concept")
+    claim_fields = _wiki_template_fields(parser_config, "claim")
+    entity_type_rules = _wiki_type_rules(ent_fields)
+    relation_type_rules = _wiki_type_rules(rel_fields)
+    concept_term = _wiki_pipe_join(concept_fields, "term")
+    concept_definition_excerpt = _wiki_colon_join(concept_fields, "term", "definition_excerpt")
+    claim_statement = _wiki_named_field_description(claim_fields, "statement")
+    claim_subject = _wiki_named_field_description(claim_fields, "subject")
+    custom_rules = _wiki_template_custom_rules(parser_config)
+
+    if isinstance(parser_config, dict):
+        output = _struct_get(parser_config, "output", default={}) or {}
+        entities_cfg = _struct_get(output, "entities", default={}) or {}
+        relations_cfg = _struct_get(output, "relations", default={}) or {}
+        legacy_ent_fields = _struct_get(entities_cfg, "fields", default=[]) or []
+        legacy_rel_fields = _struct_get(relations_cfg, "fields", default=[]) or []
+        if not entity_type_rules and legacy_ent_fields:
+            entity_type_rules = _wiki_render_schema_body(
+                legacy_ent_fields,
+                language,
+                _DEFAULT_ENTITY_SCHEMA_BODY,
+            )
+        if not relation_type_rules and legacy_rel_fields:
+            relation_type_rules = _wiki_render_schema_body(
+                legacy_rel_fields,
+                language,
+                _DEFAULT_RELATION_SCHEMA_BODY,
+            )
+
+    if not entity_type_rules:
+        entity_type_rules = "person|org|product|regulation|location|system|equipment|other"
+    if not relation_type_rules:
+        relation_type_rules = "include|ordered|owns|part_of|caused_by|regulates|uses|located_in|other"
+    if not concept_term:
+        concept_term = "named term or topic"
+    if not concept_definition_excerpt:
+        concept_definition_excerpt = "short definition excerpt from the source text"
+    if not claim_statement:
+        claim_statement = "factual statement"
+    if not claim_subject:
+        claim_subject = "entity or concept that the claim is about"
+    if not custom_rules:
+        custom_rules = _wiki_build_custom_rules(parser_config, language)
+
+    return WIKI_MAP_USER_TEMPLATE.format(
+        doc_id=doc_id,
+        chunk_count=chunk_count,
+        chunk_id_list=chunk_id_list,
+        packed_chunks=packed_chunks,
+        entity_type_rules=entity_type_rules,
+        relation_type_rules=relation_type_rules,
+        concept_term=concept_term,
+        concept_definition_excerpt=concept_definition_excerpt,
+        claim_statement=claim_statement,
+        claim_subject=claim_subject,
+        custom_rules=custom_rules,
+    )
+
+
+def _wiki_pick_chunk_text(chunk: dict) -> str:
+    text = chunk.get("text") or chunk.get("content_with_weight") or chunk.get("content") or ""
+    return text if isinstance(text, str) else ""
+
+
+# Matches a bare 16-char lowercase-hex token bounded by non-word chars on
+# both sides — the shape of an xxh64 hexdigest (chunk_id / row id).
+_HEX16_TOKEN_RE = re.compile(r"(?<![0-9a-zA-Z])[0-9a-f]{16}(?![0-9a-zA-Z])")
+# Similar for the 32-char doc-id / uuid-without-dashes pattern.
+_HEX32_TOKEN_RE = re.compile(r"(?<![0-9a-zA-Z])[0-9a-f]{32}(?![0-9a-zA-Z])")
+
+
+def _wiki_scrub_known_ids(text: str, ids_to_remove) -> str:
+    """Defensive scrub: strip any literal occurrence of known ES ids from
+    chunk text before sending to the extraction LLM.
+
+    Some chunkers embed the chunk_id / doc_id into the body (e.g. as a
+    header, footer, or breadcrumb). Without this scrub the extraction LLM
+    grabs the hash and reports it as an entity name (commonly mis-typed as
+    "location"). We belt-and-brace by removing:
+
+      1. Every literal id passed in ``ids_to_remove`` (chunk_ids of the
+         batch + the doc_id).
+      2. Any standalone 16-hex or 32-hex token still left over after (1).
+    """
+    if not text:
+        return text
+    out = text
+    for h in ids_to_remove or ():
+        if h and isinstance(h, str) and h in out:
+            out = out.replace(h, "")
+    out = _HEX16_TOKEN_RE.sub("", out)
+    out = _HEX32_TOKEN_RE.sub("", out)
+    return out
+
+
+def _wiki_format_batch_prompt(packed: list[dict]) -> tuple[str, list[str]]:
+    """Render the [CHUNK_ID …]-labelled body and return (body_text, label_order)."""
+    parts: list[str] = []
+    labels: list[str] = []
+    for entry in packed:
+        labels.append(entry["label"])
+        parts.append(f"[CHUNK_ID {entry['label']}]\n{entry['text']}")
+    return "\n\n".join(parts), labels
+
+
+def _wiki_unwrap_extract(res) -> dict:
+    """Coerce LLM JSON to the canonical 5-key shape with defaulted lists."""
+    out = _wiki_empty_extract()
+    if not isinstance(res, dict):
+        return out
+    for k in _EXTRACT_LIST_KEYS:
+        v = res.get(k)
+        if isinstance(v, list):
+            out[k] = [item for item in v if isinstance(item, dict)]
+    topics = res.get("topics")
+    if isinstance(topics, list):
+        out["topics"] = [t for t in topics if isinstance(t, str) and t.strip()]
+    return out
+
+
+# Matches strings the LLM should NEVER use as an entity / concept / claim name:
+#   - chunk tag scaffolding: C1, C2, c0001, …
+#   - bare hexadecimal hashes (xxh64 16-char, doc-id 32-char)
+#   - UUIDs with or without dashes
+# Anything matching is dropped post-extraction as defensive filtering.
+_WIKI_IDENTIFIER_LIKE_RE = re.compile(
+    r"""^\s*(
+        [Cc]\d{1,5}                       # chunk tag like C1, c0001
+        | [0-9a-fA-F]{16}                 # xxh64 hexdigest
+        | [0-9a-fA-F]{32}                 # md5 / doc_id-shaped
+        | [0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}  # UUID
+    )\s*$""",
+    re.VERBOSE,
+)
+
+
+def _wiki_looks_like_identifier(s) -> bool:
+    """True when ``s`` looks like a chunk tag, hash, or UUID rather than a name."""
+    if not isinstance(s, str):
+        return False
+    return bool(_WIKI_IDENTIFIER_LIKE_RE.match(s))
+
+
+def _wiki_item_has_identifier_name(key: str, item: dict) -> bool:
+    """Return True when an extracted item's display name is identifier-shaped.
+
+    Different list keys carry the name field under different keys:
+      - entities  → ``name``
+      - concepts  → ``term``
+      - claims    → ``subject``
+      - relations → ``from`` and ``to`` (drop if either is identifier-shaped)
+    """
+    if key == "entities":
+        return _wiki_looks_like_identifier(item.get("name", ""))
+    if key == "concepts":
+        return _wiki_looks_like_identifier(item.get("term", ""))
+    if key == "claims":
+        return _wiki_looks_like_identifier(item.get("subject", ""))
+    if key == "relations":
+        return _wiki_looks_like_identifier(item.get("from", "")) or _wiki_looks_like_identifier(item.get("to", ""))
+    return False
+
+
+def _wiki_resolve_chunk_ids(
+    extract: dict,
+    label_to_id: dict[str, str],
+) -> tuple[dict, dict[str, dict]]:
+    """Split a batch extract by source chunk id.
+
+    Returns:
+        merged: the input extract with ``source_chunk_id`` rewritten to
+                ``chunk_ids=[real_id]`` per item, dropping items whose label
+                does not match any in ``label_to_id``.
+        per_chunk: {real_chunk_id: extract-shaped dict containing only the
+                   items attributed to that chunk}. Includes empty extracts
+                   for every label in ``label_to_id`` so resume knows the
+                   chunk was processed even when nothing was extracted.
+    """
+    per_chunk: dict[str, dict] = {real_id: _wiki_empty_extract() for real_id in label_to_id.values()}
+    merged = _wiki_empty_extract()
+    merged["topics"] = list(extract.get("topics") or [])
+
+    dropped = 0
+    dropped_identifier = 0
+    for key in _EXTRACT_LIST_KEYS:
+        for item in extract.get(key) or []:
+            label = item.get("source_chunk_id")
+            real = label_to_id.get(label) if isinstance(label, str) else None
+            if real is None:
+                dropped += 1
+                continue
+            # Drop items whose display name is identifier-shaped — the LLM
+            # occasionally grabs prompt scaffolding (C1, C2, …) or leftover
+            # hash tokens and reports them as entities/concepts/claims. The
+            # prompt forbids this but post-filtering is the bulletproof guard.
+            if _wiki_item_has_identifier_name(key, item):
+                dropped_identifier += 1
+                continue
+            new_item = {k: v for k, v in item.items() if k != "source_chunk_id"}
+            new_item["chunk_ids"] = [real]
+            merged[key].append(new_item)
+            per_chunk[real][key].append(new_item)
+
+    if dropped:
+        logging.debug(f"wiki_map: dropped {dropped} item(s) with unrecognized source_chunk_id")
+    if dropped_identifier:
+        logging.info(
+            "wiki_map: dropped %d item(s) whose name looked like a prompt-scaffolding tag or hash",
+            dropped_identifier,
+        )
+
+    return merged, per_chunk
+
+
+def _wiki_merge_extracts(extracts: list[dict]) -> dict:
+    """Concat the 5 lists across multiple batch extracts (no entity-level
+    dedup — that is the REDUCE phase's job)."""
+    out = _wiki_empty_extract()
+    seen_topics: set[str] = set()
+    for ex in extracts:
+        if not isinstance(ex, dict):
+            continue
+        for key in _EXTRACT_LIST_KEYS:
+            out[key].extend(ex.get(key) or [])
+        for t in ex.get("topics") or []:
+            if t not in seen_topics:
+                seen_topics.add(t)
+                out["topics"].append(t)
+    return out
+
+
+def _wiki_build_resume_doc(
+    chunk_id: str,
+    doc_id: str,
+    per_chunk_extract: dict,
+    chunk_hash: str = "",
+) -> dict:
+    """Build the non-searchable ES doc that records a per-chunk MAP extract.
+
+    Intentionally omits ``q_<dim>_vec`` / ``content_ltks`` / ``content_sm_ltks``
+    so retrievers cannot surface this row; also sets ``available_int=0`` which
+    most ragflow retrievers already filter on.
+
+    ``chunk_hash`` fingerprints the chunk's content as of extraction time.
+    The incremental MAP re-run reads it back and compares against the
+    current chunk's hash to decide whether to re-extract.
+    """
+    content_with_weight = json.dumps(per_chunk_extract, ensure_ascii=False)
+    doc_id_str = str(doc_id)
+    return {
+        "id": _stable_row_id(content_with_weight, doc_id_str, chunk_id),
+        "doc_id": doc_id_str,
+        "compile_kwd": WIKI_MAP_COMPILE_KWD,
+        "source_chunk_ids": [chunk_id],
+        "chunk_hash_kwd": chunk_hash,
+        "content_with_weight": content_with_weight,
+        "available_int": 0,
+    }
+
+
+async def _wiki_load_resume_map(
+    doc_id: str,
+    tenant_id: str,
+    kb_id: str,
+) -> dict[str, str]:
+    """Query ES for chunks that already have a artifact_map_extract row for
+    this doc. Returns ``{chunk_id → chunk_hash}``.
+
+    ``chunk_hash`` may be empty for legacy rows that predate the field —
+    callers treat empty as "definitely re-MAP" (no hash to compare).
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {
+        "compile_kwd": [WIKI_MAP_COMPILE_KWD],
+        "doc_id": [str(doc_id)],
+    }
+    select_fields = ["id", "source_chunk_ids", "chunk_hash_kwd"]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [],
+            OrderByExpr(),
+            0,
+            10000,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("wiki_map: failed to query resume map; will re-extract all chunks")
+        return {}
+
+    seen: dict[str, str] = {}
+    for row in field_map.values():
+        src = row.get("source_chunk_ids") or []
+        hh = row.get("chunk_hash_kwd")
+        if not isinstance(hh, str):
+            hh = ""
+        if isinstance(src, list):
+            for cid in src:
+                if isinstance(cid, str) and cid:
+                    # First-write-wins is fine: if a doc has two rows for
+                    # the same chunk_id (legacy / dirty state), we treat
+                    # the first as the canonical and let the changed-hash
+                    # path or the deletion sweep clean it up later.
+                    seen.setdefault(cid, hh)
+    return seen
+
+
+async def _wiki_delete_map_rows(
+    doc_id: str,
+    chunk_ids: list[str],
+    tenant_id: str,
+    kb_id: str,
+) -> int:
+    """Delete ``artifact_map_extract`` rows for ``(doc_id, chunk_id)`` pairs.
+
+    Used by the incremental MAP path:
+      * stale rows whose chunk content has changed → re-extracted next.
+      * rows whose chunk_id is gone from the doc (chunk deleted upstream).
+
+    Returns the number of distinct ``chunk_ids`` we attempted to drop;
+    the backend may delete more (e.g. duplicate rows) — we don't try to
+    track that precisely.
+    """
+    if not chunk_ids:
+        return 0
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {
+        "compile_kwd": [WIKI_MAP_COMPILE_KWD],
+        "doc_id": [str(doc_id)],
+        "source_chunk_ids": list(chunk_ids),
+    }
+    try:
+        await thread_pool_exec(
+            settings.docStoreConn.delete,
+            condition,
+            index,
+            kb_id,
+        )
+    except Exception:
+        logging.exception(
+            "wiki_map: failed to delete %d stale extract row(s) for doc %s",
+            len(chunk_ids),
+            doc_id,
+        )
+        return 0
+    return len(chunk_ids)
+
+
+async def _wiki_persist_extracts(
+    per_chunk: dict[str, dict],
+    doc_id: str,
+    tenant_id: str,
+    kb_id: str,
+    chunk_hashes: Optional[dict[str, str]] = None,
+) -> None:
+    """Write one non-searchable ES doc per source chunk_id.
+
+    ``chunk_hashes`` (``{chunk_id → chunk_hash}``) is stamped onto each
+    row so the next incremental run can decide whether to re-MAP.
+    Missing entries default to '' (treated as "definitely re-MAP" by the
+    resume-map comparator).
+    """
+    if not per_chunk:
+        return
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    hashes = chunk_hashes or {}
+    docs = [
+        _wiki_build_resume_doc(
+            chunk_id,
+            doc_id,
+            extract,
+            chunk_hash=hashes.get(chunk_id, ""),
+        )
+        for chunk_id, extract in per_chunk.items()
+        if chunk_id
+    ]
+    if not docs:
+        return
+    try:
+        await thread_pool_exec(settings.docStoreConn.insert, docs, index, kb_id)
+    except Exception:
+        logging.exception("wiki_map: failed to persist %d resume docs", len(docs))
+
+
+# ---------------------------------------------------------------------------
+# Per-batch extraction
+# ---------------------------------------------------------------------------
+
+
+async def _wiki_extract_one_batch(
+    packed: list[dict],
+    doc_id: str,
+    chat_mdl,
+    language: str,
+    llm_timeout: int,
+    parser_config: Optional[dict] = None,
+) -> dict:
+    """Single LLM call for one packed batch. Returns the raw (label-tagged)
+    extract dict.
+
+    The entity / relation schemas and the extra rules sections of the
+    prompt are rendered from ``parser_config`` when supplied (mirroring
+    ``compile_structure_from_text``); when omitted, the built-in defaults
+    are used."""
+    body, labels = _wiki_format_batch_prompt(packed)
+    user_prompt = _wiki_build_user_prompt(
+        parser_config=parser_config,
+        language=language,
+        doc_id=doc_id,
+        chunk_count=len(packed),
+        chunk_id_list="\n".join(f"- {label}" for label in labels),
+        packed_chunks=body,
+    )
+    try:
+        res = await asyncio.wait_for(
+            gen_json(WIKI_MAP_SYSTEM, user_prompt, chat_mdl, gen_conf={"temperature": 0.1}),
+            timeout=llm_timeout,
+        )
+    except asyncio.TimeoutError:
+        logging.warning("wiki_map: batch extraction timed out after %ds (%d chunks)", llm_timeout, len(packed))
+        return _wiki_empty_extract()
+    except Exception:
+        logging.exception("wiki_map: batch extraction failed (%d chunks)", len(packed))
+        return _wiki_empty_extract()
+    _ = language  # reserved for future localization
+    return _wiki_unwrap_extract(res)
+
+
+async def _wiki_process_batch(
+    packed: list[dict],
+    batch_idx: int,
+    total_batches: int,
+    doc_id: str,
+    tenant_id: str,
+    kb_id: str,
+    chat_mdl,
+    language: str,
+    llm_timeout: int,
+    semaphore: Optional[asyncio.Semaphore],
+    callback: Optional[Callable],
+    parser_config: Optional[dict] = None,
+    chunk_hashes: Optional[dict[str, str]] = None,
+) -> dict:
+    """Run one batch end-to-end: LLM extract → split by source_chunk_id →
+    persist resume docs → return the merged batch extract.
+
+    ``chunk_hashes`` is the ``{chunk_id → chunk_hash}`` map captured at
+    the top of ``wiki_map_from_chunks``; threaded through so the
+    persisted resume rows record the right hash and the next
+    incremental run can compare cleanly.
+    """
+    if not packed:
+        return _wiki_empty_extract()
+
+    label_to_id = {entry["label"]: entry["chunk_id"] for entry in packed}
+
+    async def _run() -> dict:
+        raw_extract = await _wiki_extract_one_batch(
+            packed,
+            doc_id,
+            chat_mdl,
+            language,
+            llm_timeout,
+            parser_config=parser_config,
+        )
+        merged, per_chunk = _wiki_resolve_chunk_ids(raw_extract, label_to_id)
+        await _wiki_persist_extracts(
+            per_chunk,
+            doc_id,
+            tenant_id,
+            kb_id,
+            chunk_hashes=chunk_hashes,
+        )
+        if callback:
+            try:
+                n_items = sum(len(merged.get(k) or []) for k in _EXTRACT_LIST_KEYS)
+                callback(
+                    (batch_idx + 1) / max(1, total_batches),
+                    f"Wiki MAP {batch_idx + 1}/{total_batches}: {n_items} items from {len(packed)} chunks",
+                )
+            except Exception:
+                logging.debug("wiki_map: progress callback failed", exc_info=True)
+        return merged
+
+    if semaphore is not None:
+        async with semaphore:
+            return await _run()
+    return await _run()
+
+
+# ---------------------------------------------------------------------------
+# Public entry
+# ---------------------------------------------------------------------------
+
+
+async def wiki_map_from_chunks(
+    chunks: list[dict],
+    chat_mdl,
+    embd_mdl,
+    doc_id: str,
+    tenant_id: str,
+    kb_id: str,
+    language: str = "en",
+    max_workers: int = DEFAULT_WIKI_MAP_WORKERS,
+    llm_timeout: int = DEFAULT_WIKI_MAP_TIMEOUT,
+    callback: Optional[Callable] = None,
+    parser_config: Optional[dict] = None,
+    batch_size_cap: Optional[int] = None,
+    window_fraction: Optional[float] = None,
+) -> dict:
+    """Phase 1 (MAP) of the artifact compilation pipeline.
+
+    Packs the provided RAGFlow chunks into batches via ``split_chunks``, runs
+    one ``gen_json`` extraction call per batch in parallel (bounded by
+    ``max_workers``), then splits each batch's output back to per-chunk
+    extracts and persists them to ES as non-searchable ``artifact_map_extract``
+    rows so subsequent runs can skip chunks already processed.
+
+    Args:
+        chunks: list of dicts; each must expose ``id`` and ``text`` (with
+            ``content_with_weight`` / ``content`` accepted as fallbacks).
+        chat_mdl: LLMBundle for chat (used via ``gen_json``).
+        embd_mdl: LLMBundle for embeddings — accepted for downstream symmetry
+            with REDUCE/REFINE but **not used in MAP itself**.
+        doc_id: source document id; stamped onto every resume doc and on every
+            extracted item via ``chunk_ids``.
+        tenant_id, kb_id: address the doc-store index for resume reads + writes.
+        language: reserved for future prompt localization.
+        max_workers: maximum concurrent batches. Defaults to 6.
+        llm_timeout: seconds per batch extraction call.
+        callback: optional ``(progress: float, msg: str)`` progress callback.
+        parser_config: optional YAML-style config (same shape that
+            ``compile_structure_from_text`` accepts).
+            ``source_chunk_id`` field is always appended so chunk
+            attribution survives regardless of the user's schema. When
+            omitted, the built-in default artifact schema is used.
+
+    Returns:
+        ``{"entities", "concepts", "claims", "relations", "topics"}`` where
+        every item (except ``topics`` strings) carries a
+        ``chunk_ids=[<source chunk id>]`` field. No entity-level dedup is
+        performed here — that is the REDUCE phase's responsibility.
+    """
+    _ = embd_mdl  # noqa: F841 — accepted for symmetry with downstream phases
+
+    if not chunks:
+        # Even with zero chunks we still want to sweep any orphaned MAP
+        # rows that point at chunks the doc no longer has — otherwise
+        # deletions never propagate.
+        prior_resume_map = await _wiki_load_resume_map(doc_id, tenant_id, kb_id)
+        if prior_resume_map:
+            await _wiki_delete_map_rows(
+                doc_id,
+                list(prior_resume_map.keys()),
+                tenant_id,
+                kb_id,
+            )
+            logging.info(
+                "wiki_map: doc %s now has zero chunks; swept %d stale extract row(s)",
+                doc_id,
+                len(prior_resume_map),
+            )
+        out = _wiki_empty_extract()
+        out["_meta"] = {
+            "doc_id": str(doc_id),
+            "new": 0,
+            "changed": 0,
+            "deleted": len(prior_resume_map),
+            "unchanged": 0,
+            "had_delta": bool(prior_resume_map),
+        }
+        return out
+
+    # Incremental decision per current chunk:
+    #
+    #   * Compute the fresh chunk hash for every chunk in this call.
+    #   * Load the prior resume map (chunk_id → hash from the last MAP).
+    #   * NEW       — chunk_id not in prior     → MAP this chunk.
+    #   * UNCHANGED — chunk_id in prior, hash matches → skip (resume).
+    #   * CHANGED   — chunk_id in prior, hash differs → delete prior
+    #                 row, then MAP this chunk.
+    #   * DELETED   — chunk_id only in prior     → delete prior row
+    #                 (chunk was removed upstream).
+    #
+    # The "resume set" handed to ``_build_chunk_batches`` is just the
+    # UNCHANGED ids — those are the only ones the packer should skip.
+    current_chunk_hashes: dict[str, str] = {}
+    for chunk in chunks:
+        cid = chunk.get("id") or chunk.get("chunk_id")
+        if not isinstance(cid, str) or not cid:
+            continue
+        text = _wiki_pick_chunk_text(chunk) or ""
+        current_chunk_hashes[cid] = _chunk_hash(text)
+
+    prior_resume_map = await _wiki_load_resume_map(doc_id, tenant_id, kb_id)
+    unchanged_ids: set[str] = set()
+    changed_ids: list[str] = []
+    new_ids: list[str] = []
+    for cid, h in current_chunk_hashes.items():
+        prior_h = prior_resume_map.get(cid)
+        if prior_h is None:
+            new_ids.append(cid)
+        elif prior_h and prior_h == h:
+            unchanged_ids.add(cid)
+        else:
+            # Empty stored hash = legacy row written before chunk_hash_kwd
+            # existed → re-MAP. Differing hash = content changed → re-MAP.
+            changed_ids.append(cid)
+    deleted_ids = [cid for cid in prior_resume_map if cid not in current_chunk_hashes]
+
+    if changed_ids or deleted_ids:
+        await _wiki_delete_map_rows(
+            doc_id,
+            list(set(changed_ids) | set(deleted_ids)),
+            tenant_id,
+            kb_id,
+        )
+
+    if unchanged_ids or changed_ids or deleted_ids or new_ids:
+        logging.info(
+            "wiki_map: doc %s — new=%d changed=%d unchanged=%d deleted=%d",
+            doc_id,
+            len(new_ids),
+            len(changed_ids),
+            len(unchanged_ids),
+            len(deleted_ids),
+        )
+
+    # The packer's "resume" set is the UNCHANGED ids only — NEW and
+    # CHANGED both need re-extraction.
+    resume_set = unchanged_ids
+
+    # Defensive scrub: chunkers sometimes embed the chunk_id / doc_id into
+    # the body (e.g. as a header). Without this the extraction LLM tends to
+    # grab the hash as an "entity" — see _wiki_scrub_known_ids.
+    all_known_ids: list[str] = []
+    for chunk in chunks:
+        cid = chunk.get("id") or chunk.get("chunk_id")
+        if isinstance(cid, str) and cid:
+            all_known_ids.append(cid)
+    if doc_id:
+        all_known_ids.append(str(doc_id))
+
+    prompt_overhead = num_tokens_from_string(WIKI_MAP_SYSTEM + WIKI_MAP_USER_TEMPLATE)
+    packed_batches, _info = _build_chunk_batches(
+        chunks,
+        chat_mdl,
+        prompt_overhead_tokens=prompt_overhead,
+        resume_chunk_ids=resume_set,
+        scrub_text=lambda t: _wiki_scrub_known_ids(t, all_known_ids),
+        chunk_text_picker=_wiki_pick_chunk_text,
+        batch_size_cap=batch_size_cap,
+        window_fraction=window_fraction,
+    )
+    if not packed_batches:
+        return _wiki_empty_extract()
+
+    async def _process_one(batch: list[dict], bi: int, total: int) -> dict:
+        # ``_run_chunked_pipeline`` already wraps each task in the engine's
+        # semaphore, so pass ``semaphore=None`` here to avoid nesting.
+        return await _wiki_process_batch(
+            packed=batch,
+            batch_idx=bi,
+            total_batches=total,
+            doc_id=doc_id,
+            tenant_id=tenant_id,
+            kb_id=kb_id,
+            chat_mdl=chat_mdl,
+            language=language,
+            llm_timeout=llm_timeout,
+            semaphore=None,
+            callback=callback,
+            parser_config=parser_config,
+            chunk_hashes=current_chunk_hashes,
+        )
+
+    merged = await _run_chunked_pipeline(
+        packed_batches,
+        process_batch=_process_one,
+        aggregate=_wiki_merge_extracts,
+        max_workers=max_workers,
+        callback=callback,
+        log_prefix="wiki_map",
+    )
+    logging.info(
+        "wiki_map: doc %s — entities=%d concepts=%d claims=%d relations=%d topics=%d",
+        doc_id,
+        len(merged["entities"]),
+        len(merged["concepts"]),
+        len(merged["claims"]),
+        len(merged["relations"]),
+        len(merged["topics"]),
+    )
+    # Surface the incremental decisions to the orchestrator. ``had_delta``
+    # is the most useful summary: REDUCE/PLAN/REFINE can short-circuit
+    # KB-wide when no doc's MAP touched any rows on this run.
+    merged["_meta"] = {
+        "doc_id": str(doc_id),
+        "new": len(new_ids),
+        "changed": len(changed_ids),
+        "unchanged": len(unchanged_ids),
+        "deleted": len(deleted_ids),
+        "had_delta": bool(new_ids or changed_ids or deleted_ids),
+    }
+    return merged
+
+
+# ---------------------------------------------------------------------------
+# REDUCE phase (KB-scoped)
+# ---------------------------------------------------------------------------
+#
+# Migrated from D:/git/arkon/app/ai/mrp/reducer.py, steps 2.1-2.4.
+# KB reconciliation (arkon 2.5-2.6) and the planning LLM call (arkon 2.7) are
+# deferred to the PLAN phase — they belong with the planner, not the dedup.
+#
+# Scope difference from arkon: arkon REDUCE runs per source document. Here it
+# runs per knowledge base — one set of canonical entities/concepts for the
+# entire KB. Inputs come from ES (every artifact_map_extract row in this KB across
+# all docs); the result lives in ES under artifact_reduce_result.
+
+WIKI_REDUCE_COMPILE_KWD = "artifact_reduce_result"
+DEFAULT_WIKI_REDUCE_MERGE_THRESHOLD = 0.95
+DEFAULT_WIKI_REDUCE_AMBIGUOUS_LOW = 0.75
+DEFAULT_WIKI_REDUCE_AMBIGUOUS_BATCH = 50
+DEFAULT_WIKI_REDUCE_TIMEOUT = 60
+
+
+# System prompt for the LLM disambiguation batch. The shared engine
+# (``_common.bulk_dedup_items``) defaults to the same wording via
+# ``_common.DEFAULT_DISAMBIGUATE_SYSTEM``; we keep the local alias so the
+# constant name stays usable by call sites and external imports.
+WIKI_REDUCE_DISAMBIGUATE_SYSTEM = "You are a named-entity resolution assistant. Return only JSON."
+
+
+# --- ES I/O ----------------------------------------------------------------
+
+
+async def _wiki_load_all_map_extracts(tenant_id: str, kb_id: str) -> dict:
+    """Aggregate every artifact_map_extract row in this KB into one merged dict.
+
+    Pages through ES if the KB has more than the per-call cap. Returns a dict
+    in the same shape as wiki_map_from_chunks' return value.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_MAP_COMPILE_KWD]}
+    select_fields = ["id", "content_with_weight"]
+
+    PAGE_SIZE = 1000
+    offset = 0
+    merged = _wiki_empty_extract()
+    seen_topics: set[str] = set()
+
+    while True:
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [],
+                OrderByExpr(),
+                offset,
+                PAGE_SIZE,
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception("wiki_reduce: failed to page wiki_map_extract rows")
+            break
+
+        if not field_map:
+            break
+
+        for row in field_map.values():
+            content = row.get("content_with_weight")
+            if not isinstance(content, str) or not content:
+                continue
+            try:
+                payload = json.loads(content)
+            except Exception:
+                logging.debug("wiki_reduce: skipping unparseable extract row")
+                continue
+            if not isinstance(payload, dict):
+                continue
+            for key in _EXTRACT_LIST_KEYS:
+                items = payload.get(key)
+                if isinstance(items, list):
+                    merged[key].extend(item for item in items if isinstance(item, dict))
+            topics = payload.get("topics")
+            if isinstance(topics, list):
+                for t in topics:
+                    if isinstance(t, str) and t and t not in seen_topics:
+                        seen_topics.add(t)
+                        merged["topics"].append(t)
+
+        if len(field_map) < PAGE_SIZE:
+            break
+        offset += PAGE_SIZE
+
+    return merged
+
+
+async def _wiki_compute_map_input_hash(tenant_id: str, kb_id: str) -> str:
+    """xxh64 fingerprint of the **current** ``artifact_map_extract`` rows for
+    this KB — used by REDUCE / PLAN to cache-bust when MAP changed.
+
+    Built from ``sorted((chunk_id, chunk_hash))`` so:
+      * adding a new chunk → new pair appears → hash flips.
+      * editing a chunk → MAP row deleted + re-inserted with new hash → flips.
+      * deleting a chunk → MAP row gone → its pair drops → flips.
+      * everything stable → identical hash.
+
+    Empty / missing ``chunk_hash_kwd`` (legacy rows) defaults to '' so a
+    legacy KB still produces a stable hash; once those rows are touched
+    by an incremental MAP run, the hash naturally upgrades.
+
+    Pages through ES in windows of ``PAGE_SIZE`` rows — single-shot
+    "give me everything" reads hit doc-store limits on KBs with many
+    chunks. The accumulated ``pairs`` are sorted once at the end so the
+    fingerprint is independent of page order.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_MAP_COMPILE_KWD]}
+    select_fields = ["id", "source_chunk_ids", "chunk_hash_kwd"]
+
+    PAGE_SIZE = 128
+    offset = 0
+    pairs: list[tuple[str, str]] = []
+    while True:
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [],
+                OrderByExpr(),
+                offset,
+                PAGE_SIZE,
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception(
+                "wiki: failed to compute MAP input hash for kb=%s (offset=%d)",
+                kb_id,
+                offset,
+            )
+            # Partial scan → cannot trust the resulting hash; return ""
+            # so REDUCE / PLAN fall through to a full re-run rather than
+            # cache-hitting against an incomplete fingerprint.
+            return ""
+        if not field_map:
+            break
+        for row in field_map.values():
+            hh = row.get("chunk_hash_kwd")
+            if not isinstance(hh, str):
+                hh = ""
+            src = row.get("source_chunk_ids") or []
+            if isinstance(src, list):
+                for cid in src:
+                    if isinstance(cid, str) and cid:
+                        pairs.append((cid, hh))
+        if len(field_map) < PAGE_SIZE:
+            break
+        offset += PAGE_SIZE
+
+    pairs.sort()
+    body = "|".join(f"{cid}:{hh}" for cid, hh in pairs) + "|" + _WIKI_PIPELINE_REV
+    return _xxhash.xxh64(body.encode("utf-8", "surrogatepass")).hexdigest()
+
+
+async def _wiki_load_reduce_resume(
+    tenant_id: str,
+    kb_id: str,
+) -> Optional[tuple[dict, str]]:
+    """Return ``(cached_result, stored_input_hash)`` or None."""
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_REDUCE_COMPILE_KWD]}
+    select_fields = ["id", "content_with_weight", "input_hash_kwd"]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [],
+            OrderByExpr(),
+            0,
+            1,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("wiki_reduce: failed to load resume cache")
+        return None
+    if not field_map:
+        return None
+    row = next(iter(field_map.values()))
+    content = row.get("content_with_weight")
+    if not isinstance(content, str) or not content:
+        return None
+    try:
+        cached = json.loads(content)
+    except Exception:
+        logging.debug("wiki_reduce: cached result unparseable; ignoring")
+        return None
+    if not isinstance(cached, dict):
+        return None
+    stored_hash = row.get("input_hash_kwd")
+    if not isinstance(stored_hash, str):
+        stored_hash = ""
+    return cached, stored_hash
+
+
+async def _wiki_persist_reduce(
+    reduced: dict,
+    tenant_id: str,
+    kb_id: str,
+    input_hash: str = "",
+) -> None:
+    """Upsert the single non-searchable artifact_reduce_result row for this KB.
+
+    ``input_hash`` records the MAP-state fingerprint this reduction was
+    computed from; the next call compares it before re-running.
+    """
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    kb_id_str = str(kb_id)
+    content_with_weight = json.dumps(reduced, ensure_ascii=False)
+    # Stable id per KB so a re-run upserts the same row.
+    row_id = _stable_row_id(WIKI_REDUCE_COMPILE_KWD, kb_id_str)
+    doc = {
+        "id": row_id,
+        "doc_id": kb_id_str,  # sentinel — KB-scoped row, not a real document
+        "compile_kwd": WIKI_REDUCE_COMPILE_KWD,
+        "source_id": [kb_id_str],
+        "input_hash_kwd": input_hash,
+        "content_with_weight": content_with_weight,
+        "available_int": 0,
+    }
+    try:
+        # Best-effort delete then insert so re-runs replace cleanly.
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": WIKI_REDUCE_COMPILE_KWD},
+                index,
+                kb_id,
+            )
+        except Exception:
+            logging.debug("wiki_reduce: prior result delete failed; will overwrite by id")
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+    except Exception:
+        logging.exception("wiki_reduce: failed to persist result row")
+
+
+# --- public entry ----------------------------------------------------------
+
+
+async def wiki_reduce_from_extracts(
+    chat_mdl,
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    merge_threshold: float = DEFAULT_WIKI_REDUCE_MERGE_THRESHOLD,
+    ambiguous_low: float = DEFAULT_WIKI_REDUCE_AMBIGUOUS_LOW,
+    ambiguous_batch_size: int = DEFAULT_WIKI_REDUCE_AMBIGUOUS_BATCH,
+    llm_timeout: int = DEFAULT_WIKI_REDUCE_TIMEOUT,
+    force_rerun: bool = False,
+    callback: Optional[Callable] = None,
+) -> dict:
+    """Phase 2 (REDUCE/Dedup) — KB-scoped.
+
+    Loads every ``artifact_map_extract`` row in this KB (across all documents) and
+    produces a single canonical dict of entities/concepts via:
+        1. Exact dedup by ``(normalize(name), type)`` for entities and by
+           ``normalize(term)`` for concepts.
+        2. Embedding dedup of entity names: vectorized pairwise cosine over
+           ``embd_mdl.encode(...)`` output. Pairs of the same type with
+           similarity ≥ ``merge_threshold`` auto-merge; pairs in
+           ``[ambiguous_low, merge_threshold)`` go to step 3.
+        3. LLM disambiguation: batches of ambiguous pairs are sent to
+           ``chat_mdl`` via ``gen_json``; true verdicts collapse via union-find.
+        4. Apply merges: sum ``mention_count``, union ``aliases`` and
+           ``chunk_ids`` per canonical entity.
+
+    The result is persisted to ES as a single non-searchable
+    ``artifact_reduce_result`` row per KB. Subsequent calls with
+    ``force_rerun=False`` (default) return the cached row immediately; pass
+    ``force_rerun=True`` after new ``artifact_map_extract`` rows have been added.
+
+    Args:
+        chat_mdl, embd_mdl: ragflow LLMBundle instances.
+        tenant_id, kb_id: address the doc-store index.
+        merge_threshold: cosine ≥ this auto-merges. Default 0.90.
+        ambiguous_low: cosine in [ambiguous_low, merge_threshold) goes to LLM.
+        ambiguous_batch_size: max pairs per LLM disambiguation call.
+        llm_timeout: seconds per LLM disambiguation batch.
+        force_rerun: bypass the cached artifact_reduce_result.
+        callback: optional ``(progress: float, msg: str)`` callback.
+
+    Returns the canonical extract dict::
+
+        {
+          "entities":  [{"name","type","aliases","mention_count","chunk_ids"}, ...],
+          "concepts":  [{"term","definition_excerpt","mention_count","chunk_ids"}, ...],
+          "claims":    [...],   # pass-through from MAP
+          "relations": [...],   # pass-through from MAP
+          "topics":    [...],   # pass-through from MAP
+        }
+    """
+    # Incremental gate: the current MAP-state fingerprint is the union
+    # of every MAP row's (chunk_id, chunk_hash). If a cached REDUCE row
+    # exists AND its stored input_hash equals the current fingerprint,
+    # the upstream chunks haven't changed → cached output is still
+    # correct. ``force_rerun=True`` bypasses both checks for the
+    # legacy / admin "rebuild from scratch" path.
+    current_input_hash = await _wiki_compute_map_input_hash(tenant_id, kb_id)
+    if not force_rerun:
+        cached_pair = await _wiki_load_reduce_resume(tenant_id, kb_id)
+        if cached_pair is not None:
+            cached, stored_hash = cached_pair
+            if stored_hash and stored_hash == current_input_hash:
+                if callback:
+                    try:
+                        callback(1.0, "wiki REDUCE: cache hit (input unchanged)")
+                    except Exception:
+                        pass
+                return cached
+            # Cache present but stale (no hash, or hash mismatch). Fall
+            # through to a full re-reduce and write a fresh stamp.
+
+    if callback:
+        try:
+            callback(0.05, "wiki REDUCE: loading MAP extracts")
+        except Exception:
+            pass
+
+    raw = await _wiki_load_all_map_extracts(tenant_id, kb_id)
+    raw_entities = raw.get("entities") or []
+    raw_concepts = raw.get("concepts") or []
+    logging.info(
+        "wiki_reduce: kb=%s loaded raw entities=%d concepts=%d claims=%d relations=%d",
+        kb_id,
+        len(raw_entities),
+        len(raw_concepts),
+        len(raw.get("claims") or []),
+        len(raw.get("relations") or []),
+    )
+
+    if not raw_entities and not raw_concepts:
+        # Nothing to reduce; persist an empty result so resume can short-circuit.
+        empty = _wiki_empty_extract()
+        await _wiki_persist_reduce(empty, tenant_id, kb_id, input_hash=current_input_hash)
+        return empty
+
+    if callback:
+        try:
+            callback(0.25, "wiki REDUCE: dedup (exact + embedding + LLM)")
+        except Exception:
+            pass
+
+    # Entities: full three-phase dedup keyed by (normalized name, type).
+    canonical_entities = await _bulk_dedup_items(
+        raw_entities,
+        name_key="name",
+        type_key="type",
+        chat_mdl=chat_mdl,
+        embd_mdl=embd_mdl,
+        merge_threshold=merge_threshold,
+        ambiguous_low=ambiguous_low,
+        ambiguous_batch_size=ambiguous_batch_size,
+        disambiguate_system_prompt=WIKI_REDUCE_DISAMBIGUATE_SYSTEM,
+        llm_timeout=llm_timeout,
+    )
+
+    # Concepts: exact-dedup only (current behaviour); keep the longest
+    # definition_excerpt across the group via aggregate_extra.
+    def _concept_extras(group: list[dict]) -> dict:
+        best_def = max(
+            ((c.get("definition_excerpt") or "") for c in group if isinstance(c, dict)),
+            key=lambda s: len(s) if isinstance(s, str) else 0,
+            default="",
+        )
+        return {"definition_excerpt": best_def}
+
+    canonical_concepts = await _bulk_dedup_items(
+        raw_concepts,
+        name_key="term",
+        type_key=None,
+        aggregate_extra=_concept_extras,
+    )
+
+    logging.info(
+        "wiki_reduce: after dedup entities=%d concepts=%d",
+        len(canonical_entities),
+        len(canonical_concepts),
+    )
+
+    reduced = {
+        "entities": canonical_entities,
+        "concepts": canonical_concepts,
+        "claims": list(raw.get("claims") or []),
+        "relations": list(raw.get("relations") or []),
+        "topics": list(raw.get("topics") or []),
+    }
+
+    if callback:
+        try:
+            callback(0.9, "wiki REDUCE: persisting result")
+        except Exception:
+            pass
+    await _wiki_persist_reduce(reduced, tenant_id, kb_id, input_hash=current_input_hash)
+
+    logging.info(
+        "wiki_reduce: kb=%s done — entities=%d concepts=%d claims=%d relations=%d topics=%d",
+        kb_id,
+        len(reduced["entities"]),
+        len(reduced["concepts"]),
+        len(reduced["claims"]),
+        len(reduced["relations"]),
+        len(reduced["topics"]),
+    )
+
+    if callback:
+        try:
+            callback(1.0, "wiki REDUCE: done")
+        except Exception:
+            pass
+
+    return reduced
+
+
+# ---------------------------------------------------------------------------
+# PLAN phase (KB-scoped)
+# ---------------------------------------------------------------------------
+#
+# Migrated from D:/git/arkon/app/ai/mrp/reducer.py, steps 2.5-2.7 + 2.8 persist.
+# Scope: per KB (one Compilation Plan covering the entire knowledge base),
+# matching the REDUCE phase above.
+#
+# Flow:
+#   1. Resume — return cached artifact_compilation_plan ES row when present.
+#   2. Load REDUCE output from artifact_reduce_result.
+#   3. KB reconciliation — batch-embed entity/concept query texts and run a
+#      per-item KNN against existing artifact_page rows in this KB. Classify
+#      UPDATE / MAYBE / CREATE by similarity. Batched LLM resolves MAYBE.
+#   4. Planning call — one gen_json call producing the Compilation Plan JSON.
+#   5. Attach raw items as side context for REFINE (no extra ES round-trips).
+#   6. Persist as a single non-searchable artifact_compilation_plan row per KB.
+#
+# Differences vs arkon: KB-scoped instead of per-source; no `source` pages
+# emitted (chunk_ids attribution is enough); plan status defaults to
+# "approved" so REFINE can consume immediately (review workflow deferred).
+
+WIKI_PLAN_COMPILE_KWD = "artifact_compilation_plan"
+WIKI_PAGE_COMPILE_KWD = "artifact_page"
+DEFAULT_WIKI_PLAN_UPDATE_THRESHOLD = 0.95
+DEFAULT_WIKI_PLAN_MAYBE_THRESHOLD = 0.60
+DEFAULT_WIKI_PLAN_TIMEOUT = 600  # ~10 min — the planning call emits one big
+# JSON plan and reasoning models can spend a
+# long time thinking before emitting tokens.
+# Override via the ``llm_timeout`` arg to
+# ``wiki_plan_from_reduction``.
+DEFAULT_WIKI_PLAN_RECONCILE_BATCH = 50
+
+
+WIKI_PLAN_PLANNING_SYSTEM = (
+    "You are a knowledge compilation planner. Given extracted entities and their "
+    "relationship to an existing knowledge base, produce a compilation plan. "
+    "Return ONLY valid JSON."
+    "Keep the user's original language (Chinese/English etc.) for generated data."
+)
+
+
+WIKI_PLAN_RECONCILE_SYSTEM = "You are a knowledge base assistant. Return only a JSON boolean array.Keep the user's original language (Chinese/English etc.) for generated data."
+
+
+WIKI_PLAN_USER_TEMPLATE = """\
+## Knowledge base context
+Name: {kb_name}
+Description: {kb_description}
+
+## Extracted entities (with mention counts)
+{entities_summary}
+
+## Extracted concepts (with mention counts)
+{concepts_summary}
+
+## KB reconciliation results
+{kb_reconciliation}
+
+Produce a JSON compilation plan:
+
+{{
+  "pages": [
+    {{
+      "action": "CREATE",
+      "slug": "concept/example-name",
+      "title": "Example Page Title",
+      "page_type": "entity | concept | topic",
+      "entity_names": ["entity or concept name covered by this page"],
+      "related_kb_pages": ["existing-slug-1"],
+      "priority": 1
+    }}
+  ],
+  "estimated_page_count": 5,
+  "compilation_notes": "any important notes for the compiler"
+}}
+
+Rules:
+- action must be "CREATE" or "UPDATE".
+- For UPDATE, slug MUST be an existing wiki page slug from the KB
+  reconciliation list above.
+- page_type is one of: entity | concept | topic. Do NOT use "source".
+
+# Slug format (CRITICAL — every slug must follow this shape exactly)
+- The slug is ``<page_type>/<short-descriptive-name>``. The separator
+  between the type and the name MUST be a forward slash ``/``. Do NOT use a
+  hyphen here.
+- The descriptive part is lowercase, English/Latin only (transliterate
+  non-English names), and uses hyphens to join multi-word names. Keep it
+  short — 1 to 4 words is ideal.
+- The descriptive part MUST be unique to that page's specific subject. Do
+  NOT prefix every slug with the same KB-wide topic word. If the KB is
+  about logistics, do NOT emit ``concept/logistics-channels``,
+  ``concept/logistics-warehousing``, ``concept/logistics-fleet`` — emit
+  ``concept/distribution-channels``, ``concept/warehousing``,
+  ``concept/fleet-management`` instead.
+- Do NOT append numeric suffixes (``-1``, ``-2``, ``-v2``) or random hex
+  tags to make slugs distinct. If two candidate slugs collide, rename one
+  to use a different descriptive word.
+
+Examples of GOOD slugs:
+  - ``entity/jane-doe``               (entity page about a person)
+  - ``entity/acme-corp``               (entity page about a company)
+  - ``concept/fire-safety``            (concept page about a topic)
+  - ``concept/expense-approval``       (concept page about a process)
+  - ``topic/water-treatment``          (topic page grouping related items)
+
+Examples of BAD slugs (do NOT produce):
+  - ``concept-fire-safety``            (missing the ``/`` between type and name)
+  - ``concept/logistics-channels-1``   (numeric suffix to distinguish pages)
+  - ``concept/logistics-channels-abc`` (random hex tag)
+  - ``logistics/concept-channels``     (type and topic order swapped)
+  - ``concept/example-name``           (just duplicate the sample)
+
+# Other rules
+- Group closely related small entities onto the same page (max 3-4 per page).
+  BUT if a primary entity is described through several distinct thematic
+  sections that appear as concepts above, prefer a separate ``concept`` page
+  for EACH such section instead of collapsing them onto the entity page.
+- priority 1 = highest importance (process first).
+- entity_names must match the names in the entities / concepts lists above.
+- Target approximately {target_page_count} total pages (feel free to deviate
+  by ±50% if the KB content warrants it).
+- Return ONLY the JSON object.
+"""
+
+
+# --- helpers ---------------------------------------------------------------
+
+
+def _wiki_target_page_count(total_items: int) -> int:
+    """Item-count-based heuristic: clamp(8, total // 3, 60)."""
+    if total_items <= 0:
+        return 8
+    return max(8, min(60, total_items // 3))
+
+
+def _wiki_format_entity_for_plan(entity: dict, reconciliation: dict) -> str:
+    aliases = ", ".join((entity.get("aliases") or [])[:3])
+    rec = reconciliation.get(entity.get("name", ""), {})
+    action = rec.get("action", "CREATE")
+    slug = rec.get("page_slug", "")
+    kb_info = f"→ {action} {slug}".rstrip()
+    line = f"  - {entity.get('name', '')} ({entity.get('type', '')}, {entity.get('mention_count', 0)} mentions"
+    if aliases:
+        line += f", aliases: {aliases}"
+    line += f") {kb_info}"
+    return line
+
+
+def _wiki_format_concept_for_plan(concept: dict, reconciliation: dict) -> str:
+    rec = reconciliation.get(concept.get("term", ""), {})
+    action = rec.get("action", "CREATE")
+    slug = rec.get("page_slug", "")
+    kb_info = f"→ {action} {slug}".rstrip()
+    return f"  - {concept.get('term', '')} ({concept.get('mention_count', 0)} mentions) {kb_info}"
+
+
+async def _wiki_reconcile_with_kb(
+    canonical_entities: list[dict],
+    canonical_concepts: list[dict],
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    update_threshold: float,
+    maybe_threshold: float,
+) -> dict[str, dict]:
+    """Per-entity / per-concept KNN against compile_kwd=artifact_page rows in this KB.
+
+    Returns ``{name_or_term: {"action", "page_slug", "page_title", "page_id",
+    "similarity"}}``. When no artifact pages exist (first run before REFINE), every
+    item maps to ``action="CREATE"``.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import MatchDenseExpr, OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    items: list[tuple[str, str, dict]] = []  # (kind, key, source_dict)
+    for e in canonical_entities:
+        name = e.get("name")
+        if isinstance(name, str) and name:
+            items.append(("entity", name, e))
+    for c in canonical_concepts:
+        term = c.get("term")
+        if isinstance(term, str) and term:
+            items.append(("concept", term, c))
+
+    reconciliation: dict[str, dict] = {}
+    if not items:
+        return reconciliation
+
+    # Embed all query texts in one batch.
+    query_texts: list[str] = []
+    for kind, key, src in items:
+        if kind == "concept":
+            defn = src.get("definition_excerpt") or ""
+            text = f"{key}: {defn[:200]}" if defn else key
+        else:
+            text = key
+        query_texts.append(text[:4000])
+
+    try:
+        embeddings, _ = await thread_pool_exec(embd_mdl.encode, query_texts)
+        vectors = list(embeddings)
+    except Exception:
+        logging.exception("wiki_plan: reconciliation embedding failed — all items will be CREATE")
+        for _, key, _ in items:
+            reconciliation[key] = {
+                "action": "CREATE",
+                "page_slug": None,
+                "page_title": None,
+                "page_id": None,
+                "similarity": 0.0,
+            }
+        return reconciliation
+
+    if len(vectors) != len(items):
+        logging.error(
+            "wiki_plan: reconciliation embedding count mismatch (%d vs %d); CREATE all",
+            len(vectors),
+            len(items),
+        )
+        for _, key, _ in items:
+            reconciliation[key] = {
+                "action": "CREATE",
+                "page_slug": None,
+                "page_title": None,
+                "page_id": None,
+                "similarity": 0.0,
+            }
+        return reconciliation
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_PAGE_COMPILE_KWD]}
+
+    select_fields = ["id", "slug_kwd", "title_kwd", "page_type_kwd", "_score"]
+    for (_kind, key, _src), vec in zip(items, vectors):
+        vec_list = list(vec) if not hasattr(vec, "tolist") else vec.tolist()
+        if not vec_list:
+            reconciliation[key] = {
+                "action": "CREATE",
+                "page_slug": None,
+                "page_title": None,
+                "page_id": None,
+                "similarity": 0.0,
+            }
+            continue
+        match_expr = MatchDenseExpr(
+            vector_column_name=f"q_{len(vec_list)}_vec",
+            embedding_data=vec_list,
+            embedding_data_type="float",
+            distance_type="cosine",
+            topn=1,
+            extra_options={"similarity": update_threshold},
+        )
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [match_expr],
+                OrderByExpr(),
+                0,
+                1,
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception("wiki_plan: KNN failed for %r", key)
+            reconciliation[key] = {
+                "action": "CREATE",
+                "page_slug": None,
+                "page_title": None,
+                "page_id": None,
+                "similarity": 0.0,
+            }
+            continue
+
+        if not field_map:
+            reconciliation[key] = {
+                "action": "CREATE",
+                "page_slug": None,
+                "page_title": None,
+                "page_id": None,
+                "similarity": 0.0,
+            }
+            continue
+
+        top_id, top_row = next(iter(field_map.items()))
+        # Pull similarity from the search result if exposed; fall back to threshold floor.
+        sim = 0.0
+        try:
+            sim = float(getattr(top_row, "_score", None))
+        except Exception:
+            sim = 0.0
+        if sim <= 0.0:
+            sim = float(top_row.get("similarity", maybe_threshold))
+
+        slug = top_row.get("slug_kwd")
+        title = top_row.get("title_kwd")
+        if sim >= update_threshold:
+            action = "UPDATE"
+        else:
+            action = "MAYBE"
+        reconciliation[key] = {
+            "action": action,
+            "page_slug": slug,
+            "page_title": title,
+            "page_id": top_id,
+            "similarity": sim,
+        }
+
+    return reconciliation
+
+
+async def _wiki_resolve_maybe_items(
+    reconciliation: dict[str, dict],
+    chat_mdl,
+    batch_size: int,
+    llm_timeout: int,
+) -> None:
+    """Flip MAYBE → UPDATE | CREATE via batched LLM calls. Mutates in place."""
+    maybe_items = [(k, v) for k, v in reconciliation.items() if v.get("action") == "MAYBE"]
+    if not maybe_items:
+        return
+
+    for batch_start in range(0, len(maybe_items), batch_size):
+        batch = maybe_items[batch_start : batch_start + batch_size]
+        lines = []
+        for k, (name, rec) in enumerate(batch):
+            title = rec.get("page_title") or rec.get("page_slug") or ""
+            slug = rec.get("page_slug") or ""
+            sim = rec.get("similarity", 0.0)
+            lines.append(f'{k + 1}. Entity: "{name}" — existing wiki page: "{title}" (slug: {slug}, similarity: {sim:.2f})')
+
+        user_prompt = (
+            "For each pair below, decide whether the entity refers to the same "
+            "real-world concept as the existing wiki page (true = UPDATE existing "
+            "page, false = CREATE new page).\n"
+            f"Return a JSON array of exactly {len(batch)} booleans. "
+            "Return ONLY the JSON array.\n\n" + "\n".join(lines)
+        )
+
+        try:
+            res = await asyncio.wait_for(
+                gen_json(WIKI_PLAN_RECONCILE_SYSTEM, user_prompt, chat_mdl, gen_conf={"temperature": 0.0}),
+                timeout=llm_timeout,
+            )
+        except asyncio.TimeoutError:
+            logging.warning("wiki_plan: MAYBE resolution timed out (%d pairs); defaulting CREATE", len(batch))
+            for name, _ in batch:
+                reconciliation[name]["action"] = "CREATE"
+            continue
+        except Exception:
+            logging.exception("wiki_plan: MAYBE resolution failed (%d pairs); defaulting CREATE", len(batch))
+            for name, _ in batch:
+                reconciliation[name]["action"] = "CREATE"
+            continue
+
+        decisions = None
+        if isinstance(res, list):
+            decisions = res
+        elif isinstance(res, dict):
+            for v in res.values():
+                if isinstance(v, list):
+                    decisions = v
+                    break
+
+        if not isinstance(decisions, list):
+            logging.warning("wiki_plan: MAYBE LLM returned unexpected shape %r; CREATE all", type(res))
+            for name, _ in batch:
+                reconciliation[name]["action"] = "CREATE"
+            continue
+
+        for k, (name, _) in enumerate(batch):
+            verdict = decisions[k] if k < len(decisions) else False
+            reconciliation[name]["action"] = "UPDATE" if verdict else "CREATE"
+
+
+async def _wiki_planning_call(
+    canonical_entities: list[dict],
+    canonical_concepts: list[dict],
+    reconciliation: dict[str, dict],
+    chat_mdl,
+    kb_name: str | None,
+    kb_description: str | None,
+    target_page_count: int,
+    llm_timeout: int,
+) -> dict:
+    """Single LLM call → Compilation Plan JSON."""
+    # Sort by mention count descending so the planner sees the most important
+    # items first; cap to keep the prompt size reasonable.
+    sorted_entities = sorted(
+        canonical_entities,
+        key=lambda x: x.get("mention_count", 0),
+        reverse=True,
+    )
+    sorted_concepts = sorted(
+        canonical_concepts,
+        key=lambda x: x.get("mention_count", 0),
+        reverse=True,
+    )
+
+    entities_summary = "\n".join(_wiki_format_entity_for_plan(e, reconciliation) for e in sorted_entities[:200]) or "  (none)"
+    concepts_summary = "\n".join(_wiki_format_concept_for_plan(c, reconciliation) for c in sorted_concepts[:200]) or "  (none)"
+
+    kb_lines: list[str] = []
+    for name, rec in reconciliation.items():
+        if rec.get("action") == "UPDATE" and rec.get("page_slug"):
+            kb_lines.append(f"  - UPDATE: {name} → {rec['page_slug']} (sim={rec.get('similarity', 0.0):.2f})")
+    kb_reconciliation = "\n".join(kb_lines) if kb_lines else "  (all items are new)"
+
+    user_prompt = WIKI_PLAN_USER_TEMPLATE.format(
+        kb_name=kb_name or "(unspecified)",
+        kb_description=kb_description or "(no description)",
+        entities_summary=entities_summary,
+        concepts_summary=concepts_summary,
+        kb_reconciliation=kb_reconciliation,
+        target_page_count=target_page_count,
+    )
+
+    try:
+        res = await asyncio.wait_for(
+            gen_json(WIKI_PLAN_PLANNING_SYSTEM, user_prompt, chat_mdl, gen_conf={"temperature": 0.1}),
+            timeout=llm_timeout,
+        )
+    except asyncio.TimeoutError:
+        logging.warning("wiki_plan: planning LLM call timed out after %ds", llm_timeout)
+        return {"pages": [], "estimated_page_count": 0, "compilation_notes": "planning timeout"}
+    except Exception:
+        logging.exception("wiki_plan: planning LLM call failed")
+        return {"pages": [], "estimated_page_count": 0, "compilation_notes": "planning failed"}
+
+    if not isinstance(res, dict):
+        return {"pages": [], "estimated_page_count": 0, "compilation_notes": "planner returned non-object"}
+    if "pages" not in res or not isinstance(res.get("pages"), list):
+        res["pages"] = []
+    if "estimated_page_count" not in res:
+        res["estimated_page_count"] = len(res["pages"])
+    res.setdefault("compilation_notes", "")
+    return res
+
+
+# --- ES I/O ---------------------------------------------------------------
+
+
+async def _wiki_load_reduce_result(tenant_id: str, kb_id: str) -> Optional[dict]:
+    """Load the cached REDUCE output for this KB."""
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_REDUCE_COMPILE_KWD]}
+    select_fields = ["id", "content_with_weight"]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [],
+            OrderByExpr(),
+            0,
+            1,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("wiki_plan: failed to load wiki_reduce_result")
+        return None
+    if not field_map:
+        return None
+    row = next(iter(field_map.values()))
+    content = row.get("content_with_weight")
+    if not isinstance(content, str) or not content:
+        return None
+    try:
+        cached = json.loads(content)
+    except Exception:
+        logging.debug("wiki_plan: wiki_reduce_result unparseable; ignoring")
+        return None
+    return cached if isinstance(cached, dict) else None
+
+
+async def _wiki_load_reduce_input_hash(tenant_id: str, kb_id: str) -> str:
+    """Read just the ``input_hash_kwd`` off the REDUCE row (without
+    deserializing the body). Used by PLAN's incremental gate so we can
+    short-circuit without re-running the planner.
+    """
+    pair = await _wiki_load_reduce_resume(tenant_id, kb_id)
+    if pair is None:
+        return ""
+    _cached, stored_hash = pair
+    return stored_hash
+
+
+async def _wiki_load_plan_resume(
+    tenant_id: str,
+    kb_id: str,
+) -> Optional[tuple[dict, str]]:
+    """Return ``(cached_plan, stored_input_hash)`` or None.
+
+    The stored hash is whatever REDUCE's ``input_hash_kwd`` was when this
+    plan was last written. PLAN's cache check compares it to the
+    current REDUCE input hash to decide whether to re-plan.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_PLAN_COMPILE_KWD]}
+    select_fields = ["id", "content_with_weight", "input_hash_kwd"]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [],
+            OrderByExpr(),
+            0,
+            1,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("wiki_plan: failed to load cached plan")
+        return None
+    if not field_map:
+        return None
+    row = next(iter(field_map.values()))
+    content = row.get("content_with_weight")
+    if not isinstance(content, str) or not content:
+        return None
+    try:
+        cached = json.loads(content)
+    except Exception:
+        logging.debug("wiki_plan: cached plan unparseable; ignoring")
+        return None
+    if not isinstance(cached, dict):
+        return None
+    stored_hash = row.get("input_hash_kwd")
+    if not isinstance(stored_hash, str):
+        stored_hash = ""
+    return cached, stored_hash
+
+
+async def _wiki_persist_plan(
+    plan: dict,
+    tenant_id: str,
+    kb_id: str,
+    input_hash: str = "",
+) -> None:
+    """Upsert the single non-searchable artifact_compilation_plan row for this KB.
+
+    ``input_hash`` records the REDUCE-state fingerprint this plan was
+    derived from; the next call compares it before re-planning.
+    """
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    kb_id_str = str(kb_id)
+    content_with_weight = json.dumps(plan, ensure_ascii=False)
+    row_id = _stable_row_id(WIKI_PLAN_COMPILE_KWD, kb_id_str)
+    doc = {
+        "id": row_id,
+        "doc_id": kb_id_str,  # sentinel — KB-scoped row, not a real document
+        "compile_kwd": WIKI_PLAN_COMPILE_KWD,
+        "source_id": [kb_id_str],
+        "input_hash_kwd": input_hash,
+        "content_with_weight": content_with_weight,
+        "available_int": 0,
+    }
+    try:
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": WIKI_PLAN_COMPILE_KWD},
+                index,
+                kb_id,
+            )
+        except Exception:
+            logging.debug("wiki_plan: prior plan delete failed; relying on id-based upsert")
+        await thread_pool_exec(settings.docStoreConn.insert, [doc], index, kb_id)
+    except Exception:
+        logging.exception("wiki_plan: failed to persist plan row")
+
+
+# --- public entry ---------------------------------------------------------
+
+
+async def wiki_plan_from_reduction(
+    chat_mdl,
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    kb_name: Optional[str] = None,
+    kb_description: Optional[str] = None,
+    update_threshold: float = DEFAULT_WIKI_PLAN_UPDATE_THRESHOLD,
+    maybe_threshold: float = DEFAULT_WIKI_PLAN_MAYBE_THRESHOLD,
+    reconcile_batch_size: int = DEFAULT_WIKI_PLAN_RECONCILE_BATCH,
+    llm_timeout: int = DEFAULT_WIKI_PLAN_TIMEOUT,
+    force_rerun: bool = False,
+    callback: Optional[Callable] = None,
+) -> dict:
+    """Phase 3 (PLAN) — KB-scoped.
+
+    Loads the cached ``artifact_reduce_result`` for this KB, reconciles every
+    canonical entity/concept against existing ``artifact_page`` rows in the same
+    KB (top-1 KNN, with MAYBE matches resolved by a batched LLM call), then
+    asks the LLM for one Compilation Plan JSON. The plan is persisted under
+    ``compile_kwd="artifact_compilation_plan"`` with ``_status="approved"`` so
+    REFINE can consume it immediately.
+
+    Args:
+        chat_mdl, embd_mdl: ragflow LLMBundle instances.
+        tenant_id, kb_id: address the doc-store index.
+        kb_name / kb_description: optional KB-level metadata that biases the
+            planner's slug and tone choices.
+        update_threshold: cosine ≥ this → UPDATE the existing page outright.
+        maybe_threshold: cosine in [maybe_threshold, update_threshold) → ask LLM.
+        reconcile_batch_size: max pairs per LLM MAYBE-resolution call.
+        llm_timeout: seconds per LLM call (both MAYBE resolution and planning).
+        force_rerun: bypass the cached artifact_compilation_plan.
+        callback: optional ``(progress: float, msg: str)`` callback.
+
+    Returns the plan dict with this shape (plus underscore-prefixed side
+    context fields for REFINE)::
+
+        {
+          "pages":               [{action, slug, title, page_type, entity_names, related_kb_pages, priority}, ...],
+          "estimated_page_count": int,
+          "compilation_notes":   str,
+          "_status":             "approved",
+          "_entities":           [...],   # canonical entities from REDUCE
+          "_concepts":           [...],
+          "_claims":             [...],
+          "_relations":          [...],
+          "_topics":             [...],
+          "_reconciliation":     {name: {action, page_slug, page_id, similarity}, ...},
+        }
+    """
+    # Incremental gate: PLAN keys off REDUCE's input_hash. If the cached
+    # plan was stamped with the same hash REDUCE is currently exposing,
+    # nothing upstream has changed and the plan is still valid.
+    current_reduce_hash = await _wiki_load_reduce_input_hash(tenant_id, kb_id)
+    if not force_rerun:
+        cached_pair = await _wiki_load_plan_resume(tenant_id, kb_id)
+        if cached_pair is not None:
+            cached, stored_hash = cached_pair
+            if stored_hash and stored_hash == current_reduce_hash:
+                if callback:
+                    try:
+                        callback(1.0, "wiki PLAN: cache hit (REDUCE unchanged)")
+                    except Exception:
+                        pass
+                return cached
+
+    if callback:
+        try:
+            callback(0.05, "wiki PLAN: loading REDUCE result")
+        except Exception:
+            pass
+
+    reduced = await _wiki_load_reduce_result(tenant_id, kb_id)
+    if reduced is None:
+        logging.warning("wiki_plan: no wiki_reduce_result found for kb=%s — returning empty plan", kb_id)
+        empty = {
+            "pages": [],
+            "estimated_page_count": 0,
+            "compilation_notes": "no REDUCE result available",
+            "_status": "approved",
+            "_entities": [],
+            "_concepts": [],
+            "_claims": [],
+            "_relations": [],
+            "_topics": [],
+            "_reconciliation": {},
+        }
+        await _wiki_persist_plan(empty, tenant_id, kb_id, input_hash=current_reduce_hash)
+        return empty
+
+    canonical_entities = reduced.get("entities") or []
+    canonical_concepts = reduced.get("concepts") or []
+    raw_claims = reduced.get("claims") or []
+    raw_relations = reduced.get("relations") or []
+    raw_topics = reduced.get("topics") or []
+
+    total_items = len(canonical_entities) + len(canonical_concepts)
+    logging.info(
+        "wiki_plan: kb=%s reducing-input entities=%d concepts=%d (total=%d)",
+        kb_id,
+        len(canonical_entities),
+        len(canonical_concepts),
+        total_items,
+    )
+
+    if total_items == 0:
+        empty = {
+            "pages": [],
+            "estimated_page_count": 0,
+            "compilation_notes": "no canonical items",
+            "_status": "approved",
+            "_entities": canonical_entities,
+            "_concepts": canonical_concepts,
+            "_claims": raw_claims,
+            "_relations": raw_relations,
+            "_topics": raw_topics,
+            "_reconciliation": {},
+        }
+        await _wiki_persist_plan(empty, tenant_id, kb_id, input_hash=current_reduce_hash)
+        return empty
+
+    if callback:
+        try:
+            callback(0.25, "wiki PLAN: KB reconciliation")
+        except Exception:
+            pass
+
+    reconciliation = await _wiki_reconcile_with_kb(
+        canonical_entities=canonical_entities,
+        canonical_concepts=canonical_concepts,
+        embd_mdl=embd_mdl,
+        tenant_id=tenant_id,
+        kb_id=kb_id,
+        update_threshold=update_threshold,
+        maybe_threshold=maybe_threshold,
+    )
+
+    if callback:
+        n_maybe = sum(1 for v in reconciliation.values() if v.get("action") == "MAYBE")
+        try:
+            callback(0.55, f"wiki PLAN: resolving {n_maybe} MAYBE items")
+        except Exception:
+            pass
+
+    await _wiki_resolve_maybe_items(
+        reconciliation,
+        chat_mdl,
+        batch_size=reconcile_batch_size,
+        llm_timeout=llm_timeout,
+    )
+
+    if callback:
+        try:
+            callback(0.75, "wiki PLAN: planning LLM call")
+        except Exception:
+            pass
+
+    target = _wiki_target_page_count(total_items)
+    plan = await _wiki_planning_call(
+        canonical_entities=canonical_entities,
+        canonical_concepts=canonical_concepts,
+        reconciliation=reconciliation,
+        chat_mdl=chat_mdl,
+        kb_name=kb_name,
+        kb_description=kb_description,
+        target_page_count=target,
+        llm_timeout=llm_timeout,
+    )
+
+    plan["_status"] = "approved"
+    plan["_entities"] = canonical_entities
+    plan["_concepts"] = canonical_concepts
+    plan["_claims"] = raw_claims
+    plan["_relations"] = raw_relations
+    plan["_topics"] = raw_topics
+    plan["_reconciliation"] = reconciliation
+
+    if callback:
+        try:
+            callback(0.9, "wiki PLAN: persisting plan")
+        except Exception:
+            pass
+    await _wiki_persist_plan(plan, tenant_id, kb_id, input_hash=current_reduce_hash)
+
+    logging.info(
+        "wiki_plan: kb=%s done — pages=%d (target=%d) updates=%d creates=%d",
+        kb_id,
+        len(plan.get("pages") or []),
+        target,
+        sum(1 for v in reconciliation.values() if v.get("action") == "UPDATE"),
+        sum(1 for v in reconciliation.values() if v.get("action") == "CREATE"),
+    )
+
+    if callback:
+        try:
+            callback(1.0, "wiki PLAN: done")
+        except Exception:
+            pass
+
+    return plan
+
+
+# ---------------------------------------------------------------------------
+# REFINE phase (KB-scoped)
+# ---------------------------------------------------------------------------
+#
+# Migrated from D:/git/arkon/app/ai/mrp/writer.py (simple writer path) and
+# merger.py (merge_page_content).
+#
+# Scope: per KB. Consumes the artifact_compilation_plan row written by PLAN,
+# writes one artifact_page per planned page in parallel under a semaphore.
+# UPDATE actions LLM-merge new vs existing content with a 70 % shrink-check
+# fallback to the new content. Each written page is persisted to ES as a
+# searchable artifact_page row (with embedding) so PLAN reconciliation finds it
+# on the next REDUCE→PLAN cycle.
+#
+# Resume: per-slug artifact_page_draft rows act as a cache; a re-entry skips
+# slugs already cached unless force_rerun=True.
+#
+# Differences vs arkon: no full_text — source context is the union of the
+# evidence chunks fetched from ES by id. Image-marker handling and the
+# complex tool-using writer are deliberately deferred.
+
+WIKI_DRAFT_COMPILE_KWD = "artifact_page_draft"
+DEFAULT_WIKI_REFINE_WORKERS = 4
+DEFAULT_WIKI_REFINE_TIMEOUT = 300
+WIKI_REFINE_SOURCE_BUDGET_CHARS = 60_000
+WIKI_MERGE_BODY_SHRINK_THRESHOLD = 0.7
+WIKI_MERGE_TIMEOUT = 600
+
+
+WIKI_TEMPLATE_EXAMPLE = (
+    "Each page must be a proper encyclopedic article, NOT a flat bullet list:\n"
+    "1. Opening paragraph (2-4 sentences defining what this is). No heading.\n"
+    "2. Sections with H2 headings, each starting with prose before sub-bullets.\n"
+    "3. Bold key terms on first use; link them with [[ ]] wikilinks.\n"
+    "4. Examples or implications where the source provides them.\n"
+    "5. ## See also section at the end with wikilinks to highly related pages(less than 12).\n\n"
+    "Page structure could be as following:\n(Not provided)"
+)
+
+# Writer system prompt as a template: the ``{template_example}``
+# placeholder is filled in at request time so each artifact compilation
+# template can override the page-structure section without touching the
+# rest of the writer's guidance. Use ``_build_refine_writer_system`` to
+# materialize a concrete prompt; ``WIKI_REFINE_WRITER_SYSTEM`` is
+# kept as the default-filled value for back-compat with any code that
+# still imports it.
+WIKI_REFINE_WRITER_SYSTEM_TEMPLATE = (
+    "You are an enterprise knowledge compilation writer. Your job is to write a single, "
+    "high-quality wiki page by reading the SOURCE TEXT provided and using the "
+    "evidence checklist as guidance for what to cover.\n\n"
+    "# Mindset: COMPILE, do NOT summarize\n"
+    "You are not writing an executive summary. You are extracting structured "
+    "knowledge and rewriting it into a reusable wiki page. The output should "
+    "contain MORE information density than a summary — organized differently, "
+    "but not condensed. A summary loses specifics. A wiki page preserves them "
+    "in a queryable structure.\n\n"
+    "# What to KEEP from the source (do not lose these)\n"
+    "- Specific numbers: thresholds, dosages, timeframes, dimensions, percentages.\n"
+    "- Named regulations, laws, articles, code references.\n"
+    "- Equipment names, model numbers, product specs.\n"
+    "- Procedure steps in order, with actual actions.\n"
+    "- Worked examples and exceptions.\n"
+    "- Named parties, roles, contact paths, escalation chains.\n"
+    "- Definitions verbatim or near-verbatim if the source is authoritative.\n"
+    "- Cause-effect statements ('X causes Y because Z') — preserve all three parts.\n\n"
+    "# What to DROP\n"
+    "- Marketing language, mission statements, ceremonial filler.\n"
+    "- Source-specific framing: 'This document explains…', 'In Section 3 below…'.\n"
+    "- Repeated boilerplate, tables of contents, cover-page metadata.\n"
+    "- Prose that just rephrases what was already said.\n\n"
+    "# Language\n"
+    "Write in the SAME LANGUAGE as the source text. Never translate content.\n\n"
+    "# Page structure — CRITICAL\n"
+    "{template_example}\n\n"
+    "# What NOT to do\n"
+    "- Do NOT dump raw bullet points from the source as the entire content.\n"
+    "- Do NOT omit the opening prose paragraph.\n"
+    "- Do NOT include Citations / Footnotes sections.\n"
+    "- Do NOT use [^N] footnote markers.\n"
+    "- Do NOT translate the content language.\n\n"
+    "# Wikilinks\n"
+    "- Use [[slug]] or [[slug|display text]] to cross-link.\n"
+    "- CRITICAL: You may ONLY link to slugs from the 'Available pages' list.\n"
+    "  Do NOT invent or hallucinate slugs.\n\n"
+    "# Minimum depth\n"
+    "- concept/topic pages: at least 200 words of actual prose+structure.\n"
+    "- entity pages: at least 100 words.\n"
+)
+
+
+def _build_refine_writer_system(example: str | None) -> str:
+    """Return the writer system prompt with the configured page-structure
+    example (or ``WIKI_TEMPLATE_EXAMPLE`` when ``example`` is empty /
+    whitespace-only). Used by the REFINE phase to let each compilation
+    template override just the page-structure section.
+
+    The default-filled form is also exposed as
+    ``WIKI_REFINE_WRITER_SYSTEM`` for callers that don't have an
+    override to apply.
+    """
+    body = (example or "").strip() or WIKI_TEMPLATE_EXAMPLE
+    return WIKI_REFINE_WRITER_SYSTEM_TEMPLATE.format(template_example=body)
+
+
+WIKI_REFINE_WRITER_SYSTEM = _build_refine_writer_system(None)
+
+
+WIKI_REFINE_WRITER_USER_TEMPLATE = """\
+## Task
+{action} the following wiki page.
+
+## Page specification
+- Slug: {slug}
+- Title: {title}
+- Type: {page_type}
+
+## Available pages (ONLY use these slugs for [[wikilinks]])
+{all_plan_slugs}
+
+{existing_section}
+
+## Source document text
+Read this carefully. Extract all relevant facts for this page's topic.
+
+{source_context}
+
+## Evidence checklist ({evidence_count} items)
+The following items were pre-extracted and should be covered in the page.
+Use them as a checklist — make sure you don't miss any of these facts.
+But also look for additional relevant information in the source text above.
+
+{evidence_blocks}
+
+## Instructions
+Write the complete wiki page in markdown based on the source text above.
+Cross-link to other pages using [[slug]] or [[slug|display text]] — ONLY
+use slugs from the "Available pages" list. Do NOT invent new slugs.
+Do NOT include Citations or Footnotes sections.
+MUST be in the language as the same as the source document text is.
+
+Return ONLY the markdown content, no other text.
+"""
+
+
+WIKI_REFINE_MERGE_SYSTEM = (
+    "You are a wiki page merger. You receive two versions of the same wiki page:\n"
+    "- EXISTING: the current version in the knowledge base.\n"
+    "- INCOMING: a new version generated from a different source document.\n\n"
+    "Your job is to produce a SINGLE unified page that preserves ALL factual "
+    "content from BOTH versions. Rules:\n\n"
+    "1. KEEP all facts, numbers, procedures, names from both versions.\n"
+    "2. REMOVE exact duplicates — if both versions state the same fact, keep it once.\n"
+    "3. ORGANIZE coherently — clear H2 sections, opening paragraph, ## See also.\n"
+    "4. PRESERVE [[wikilinks]] from both versions.\n"
+    "5. Write in the SAME LANGUAGE as the existing content.\n"
+    "6. Do NOT summarize or condense — the merged page should be AT LEAST as long "
+    "as the longer of the two inputs.\n"
+    "7. Do NOT add any facts not present in either version.\n\n"
+    "Return ONLY the merged markdown content, no other text."
+)
+
+
+# --- helpers ---------------------------------------------------------------
+
+
+_REFINE_THINK_PREFIX_RE = re.compile(r"^.*</think>", re.DOTALL)
+
+
+def _wiki_strip_think(raw: str) -> str:
+    """Strip a leading ``...</think>`` block that some LLMs emit."""
+    if not isinstance(raw, str):
+        return ""
+    return _REFINE_THINK_PREFIX_RE.sub("", raw).strip()
+
+
+def _wiki_assemble_evidence(
+    plan_item: dict,
+    claims: list[dict],
+    entity_by_name: dict[str, dict] | None = None,
+    concept_by_term: dict[str, dict] | None = None,
+) -> list[dict]:
+    """Find claims whose `subject` matches any `entity_name` in the plan item.
+
+    Match is case-insensitive: exact match on the full normalized subject, or
+    whole-word substring match for multi-word subjects. Each returned
+    evidence item carries chunk_ids[] for downstream source-context loading.
+
+    Fallback: if no claim attributes this page (a common case when the MAP
+    LLM extracted entities but no claims for them), synthesize a single
+    evidence stub from the canonical entity/concept records — that way
+    provenance (chunk_ids / source_doc_ids) and the source-context fetch
+    still resolve to the chunks that produced the entity/concept itself.
+    Pass ``entity_by_name`` / ``concept_by_term`` (lowercased-key lookups
+    over ``plan["_entities"]`` / ``plan["_concepts"]``) to enable the
+    fallback.
+    """
+    raw_names = [n.strip() for n in (plan_item.get("entity_names") or []) if isinstance(n, str) and n.strip()]
+    if not raw_names:
+        return []
+
+    names_lower = [n.lower() for n in raw_names]
+    patterns = [re.compile(rf"\b{re.escape(n)}\b", re.IGNORECASE) for n in raw_names]
+
+    evidence: list[dict] = []
+    for claim in claims:
+        if not isinstance(claim, dict):
+            continue
+        subj_raw = (claim.get("subject") or "").strip()
+        if not subj_raw:
+            continue
+        subj_lower = subj_raw.lower()
+
+        matched = subj_lower in names_lower or any(p.search(subj_raw) for p in patterns)
+        if not matched:
+            continue
+
+        chunk_ids = claim.get("chunk_ids") or []
+        evidence.append(
+            {
+                "statement": claim.get("statement", ""),
+                "subject": claim.get("subject", ""),
+                "confidence": claim.get("confidence", "explicit"),
+                "chunk_ids": [c for c in chunk_ids if isinstance(c, str) and c],
+            }
+        )
+
+    if evidence:
+        return evidence
+
+    # ---- Fallback: derive evidence from entity/concept chunk_ids. -------
+    if not entity_by_name and not concept_by_term:
+        return []
+
+    fallback_chunk_ids: list[str] = []
+    matched_names: list[str] = []
+    for name, name_lc in zip(raw_names, names_lower):
+        hit = None
+        if entity_by_name:
+            hit = entity_by_name.get(name_lc)
+        if hit is None and concept_by_term:
+            hit = concept_by_term.get(name_lc)
+        if not hit:
+            continue
+        for cid in hit.get("chunk_ids") or []:
+            if isinstance(cid, str) and cid and cid not in fallback_chunk_ids:
+                fallback_chunk_ids.append(cid)
+        matched_names.append(name)
+
+    if not fallback_chunk_ids:
+        return []
+
+    # Marker ``_synthetic`` keeps this item out of the writer prompt — it
+    # exists only to carry chunk_ids forward for provenance and source-context
+    # fetching. _wiki_format_evidence_blocks filters it out.
+    return [
+        {
+            "statement": "",
+            "subject": matched_names[0] if matched_names else raw_names[0],
+            "confidence": "inferred",
+            "chunk_ids": fallback_chunk_ids,
+            "_synthetic": True,
+        }
+    ]
+
+
+def _wiki_format_evidence_blocks(evidence: list[dict]) -> str:
+    # Filter out synthetic stubs (entity-fallback chunk-id carriers) — they
+    # don't represent real claims and shouldn't appear in the writer's
+    # evidence checklist.
+    real_evidence = [ev for ev in (evidence or []) if not ev.get("_synthetic")]
+    if not real_evidence:
+        return "(no pre-extracted evidence — extract facts directly from the source document text above)"
+    lines: list[str] = []
+    for i, ev in enumerate(real_evidence, 1):
+        confidence = (ev.get("confidence") or "explicit").upper()
+        subject = ev.get("subject") or ""
+        statement = ev.get("statement") or ""
+        lines.append(f"{i}. [{confidence}] {subject}\n   {statement}")
+    return "\n\n".join(lines)
+
+
+def _wiki_collect_evidence_chunk_ids(evidence: list[dict]) -> list[str]:
+    seen: list[str] = []
+    for ev in evidence:
+        for cid in ev.get("chunk_ids") or []:
+            if isinstance(cid, str) and cid and cid not in seen:
+                seen.append(cid)
+    return seen
+
+
+async def _wiki_load_chunks_by_id(
+    chunk_ids: list[str],
+    tenant_id: str,
+    kb_id: str,
+) -> dict[str, str]:
+    """Fetch chunks from ES by id. Returns ``{chunk_id: content_with_weight}``.
+
+    Tries a batch search first (``condition={"id": [chunk_ids]}``) and falls
+    back to ``docStoreConn.get(chunk_id, …)`` per missing id. The fallback
+    exists because the ``id`` filter is translated differently by each
+    backend (ES uses a bool/should over the ``id`` keyword and the reserved
+    ``_id``; Infinity translates to SQL ``id IN (...)``; OpenSearch routes
+    to the ``ids`` query). A chunk that was inserted without populating the
+    queryable ``id`` field — or whose dataset/table doesn't expose it the
+    same way — silently drops out of the batch query but is still
+    addressable via the primary-key ``get()``. The fallback heals both.
+    """
+    if not chunk_ids:
+        return {}
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    select_fields = ["id", "content_with_weight"]
+    out: dict[str, str] = {}
+    unique_ids = [cid for cid in dict.fromkeys(chunk_ids) if isinstance(cid, str) and cid]
+    if not unique_ids:
+        return {}
+
+    BATCH = 500
+    for i in range(0, len(unique_ids), BATCH):
+        batch_ids = unique_ids[i : i + BATCH]
+        condition = {"id": batch_ids}
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [],
+                OrderByExpr(),
+                0,
+                len(batch_ids),
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception("wiki_refine: batch chunk fetch failed (%d ids)", len(batch_ids))
+            field_map = {}
+        for cid, row in field_map.items():
+            content = row.get("content_with_weight")
+            if isinstance(content, str) and content:
+                out[cid] = content
+
+    # Per-id fallback for anything the batch missed — robust against backend-
+    # specific quirks in how ``condition={"id": [list]}`` is translated.
+    missing = [cid for cid in unique_ids if cid not in out]
+    if missing:
+        logging.warning(
+            "wiki_refine: batch chunk fetch missed %d/%d id(s) in kb=%s; falling back to per-id get() (first missing: %s)",
+            len(missing),
+            len(unique_ids),
+            kb_id,
+            missing[0],
+        )
+
+        def _get_one(cid: str):
+            try:
+                return cid, settings.docStoreConn.get(cid, index, [kb_id])
+            except Exception:
+                logging.exception("wiki_refine: per-id get failed for %s", cid)
+                return cid, None
+
+        # Run the per-id gets concurrently to keep latency reasonable.
+        results = await asyncio.gather(*[thread_pool_exec(_get_one, cid) for cid in missing], return_exceptions=False)
+
+        recovered = 0
+        for cid, doc in results:
+            if not isinstance(doc, dict):
+                continue
+            content = doc.get("content_with_weight")
+            if isinstance(content, str) and content:
+                out[cid] = content
+                recovered += 1
+
+        if recovered:
+            logging.info(
+                "wiki_refine: per-id fallback recovered %d/%d missing chunk(s)",
+                recovered,
+                len(missing),
+            )
+
+    final_missing = [cid for cid in unique_ids if cid not in out]
+    if final_missing:
+        logging.warning(
+            "wiki_refine: %d chunk(s) still unresolved after fallback in kb=%s (first: %s) — check that the chunk_ids exist in the doc-store and that the row's kb_id matches the request.",
+            len(final_missing),
+            kb_id,
+            final_missing[0],
+        )
+
+    return out
+
+
+async def _wiki_build_source_context(
+    evidence: list[dict],
+    tenant_id: str,
+    kb_id: str,
+    budget: int = WIKI_REFINE_SOURCE_BUDGET_CHARS,
+) -> str:
+    """Concatenate evidence chunks into a labelled source-context block.
+
+    Budget is char-based. Evidence chunks come first (preserve their order of
+    appearance in the evidence list); if total exceeds budget the tail is
+    truncated with a marker.
+    """
+    chunk_ids = _wiki_collect_evidence_chunk_ids(evidence)
+    if not chunk_ids:
+        return "(no source chunks available)"
+
+    chunk_map = await _wiki_load_chunks_by_id(chunk_ids, tenant_id, kb_id)
+    if not chunk_map:
+        print("chunk_map::::::::::::::", chunk_ids, tenant_id, kb_id, flush=True)
+        return "(source chunks could not be loaded)"
+
+    parts: list[str] = []
+    total = 0
+    truncated = 0
+    for cid in chunk_ids:
+        content = chunk_map.get(cid)
+        if not content:
+            continue
+        block = f"[CHUNK {cid}]\n{content}"
+        if total + len(block) + 2 > budget:
+            remaining = budget - total
+            if remaining > 1000:
+                parts.append(block[:remaining] + "\n\n[…chunk truncated…]")
+                total += remaining
+            truncated += 1
+            continue
+        parts.append(block)
+        total += len(block) + 2
+
+    if truncated:
+        parts.append(f"\n\n[…{truncated} chunk(s) omitted to fit context budget…]")
+
+    return "\n\n".join(parts)
+
+
+# --- artifactlink rewriting and doc-id collection ------------------------------
+
+_WIKILINK_PIPE_RE = re.compile(r"\[\[([^\[\]\|]+?)\|([^\[\]]+?)\]\]")
+_WIKILINK_SIMPLE_RE = re.compile(r"\[\[([^\[\]\|]+?)\]\]")
+
+
+def _wiki_transform_links(content_md: str, kb_id: str) -> tuple[str, list[str]]:
+    """Rewrite ``[[slug]]`` / ``[[slug|display]]`` wikilinks to standard
+    markdown links whose href encodes ``(kb_id, slug)`` so a renderer can
+    fetch the target page from ES.
+
+    Returns ``(rewritten_md, unique_outlinks)`` — outlinks are slug strings
+    in first-seen order. The href format is ``artifact/{kb_id}/{slug}`` which is
+    relative; clients are expected to map this to whatever route serves the
+    page (e.g. ``/api/v1/artifact/{kb_id}/{slug}``).
+    """
+    kb_id_str = str(kb_id)
+    seen: set[str] = set()
+    outlinks: list[str] = []
+
+    def _track(slug: str) -> None:
+        s = slug.strip()
+        if s and s not in seen:
+            seen.add(s)
+            outlinks.append(s)
+
+    def _piped(m: re.Match) -> str:
+        slug = m.group(1).strip()
+        text = m.group(2).strip()
+        _track(slug)
+        return f"[{text}](artifact/{kb_id_str}/{slug})"
+
+    def _simple(m: re.Match) -> str:
+        slug = m.group(1).strip()
+        _track(slug)
+        return f"[{slug}](artifact/{kb_id_str}/{slug})"
+
+    rewritten = _WIKILINK_PIPE_RE.sub(_piped, content_md or "")
+    rewritten = _WIKILINK_SIMPLE_RE.sub(_simple, rewritten)
+    return rewritten, outlinks
+
+
+async def _wiki_collect_doc_ids(
+    chunk_ids: list[str],
+    tenant_id: str,
+    kb_id: str,
+) -> list[str]:
+    """Look up ``doc_id`` for each chunk by id. Returns the unique list in
+    first-seen order (subset of the source chunks' parents).
+
+    Defensive: handles both string and list shapes of the ``doc_id`` field
+    (different doc-store connectors normalize scalar keyword fields
+    differently). Logs when nothing comes back so the empty-source_doc_ids
+    failure mode is diagnosable.
+    """
+    if not chunk_ids:
+        return []
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    select_fields = ["id", "doc_id"]
+    out: list[str] = []
+    seen: set[str] = set()
+    total_rows_seen = 0
+
+    def _accept(did) -> None:
+        if isinstance(did, str):
+            if did and did not in seen:
+                seen.add(did)
+                out.append(did)
+        elif isinstance(did, (list, tuple)):
+            for d in did:
+                if isinstance(d, str) and d and d not in seen:
+                    seen.add(d)
+                    out.append(d)
+
+    BATCH = 500
+    for i in range(0, len(chunk_ids), BATCH):
+        batch_ids = chunk_ids[i : i + BATCH]
+        condition = {"id": batch_ids}
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [],
+                OrderByExpr(),
+                0,
+                len(batch_ids),
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception("wiki_refine: failed to fetch doc_ids for %d chunks", len(batch_ids))
+            continue
+        total_rows_seen += len(field_map)
+        for row in field_map.values():
+            _accept(row.get("doc_id"))
+
+    if chunk_ids and not out:
+        logging.warning(
+            "wiki_refine: doc_id resolution returned 0 for %d chunk(s) (rows_found=%d, kb=%s); first chunk_id=%s",
+            len(chunk_ids),
+            total_rows_seen,
+            kb_id,
+            chunk_ids[0],
+        )
+    return out
+
+
+async def _wiki_get_existing_page(
+    slug: str,
+    tenant_id: str,
+    kb_id: str,
+) -> Optional[dict]:
+    """Fetch a wiki_page row by slug from this KB. Returns ``{id, content_md,
+    content_md_raw, title, page_type}`` or None. ``content_md_raw`` is the
+    pre-link-transform markdown — what the merger should consume."""
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {
+        "compile_kwd": [WIKI_PAGE_COMPILE_KWD],
+        "slug_kwd": [slug],
+    }
+    select_fields = [
+        "id",
+        "content_with_weight",
+        "title_kwd",
+        "page_type_kwd",
+    ]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            condition,
+            [],
+            OrderByExpr(),
+            0,
+            1,
+            index,
+            [kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception("wiki_refine: failed to fetch existing page for slug=%s", slug)
+        return None
+    if not field_map:
+        return None
+    row_id, row = next(iter(field_map.items()))
+    rendered = row.get("content_with_weight") or ""
+    return {
+        "id": row_id,
+        "content_md": rendered,
+        "content_md_raw": rendered,
+        "title": row.get("title_kwd") or "",
+        "page_type": row.get("page_type_kwd") or "concept",
+    }
+
+
+async def _wiki_chat_text(
+    chat_mdl,
+    system_prompt: str,
+    user_prompt: str,
+    temperature: float,
+    llm_timeout: int,
+) -> str:
+    """Single chat call returning the raw text. Trims to chat_mdl.max_length
+    via message_fit_in and strips a leading </think> block."""
+    msg = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    try:
+        _, msg = message_fit_in(msg, chat_mdl.max_length)
+    except Exception:
+        logging.exception("wiki_refine: message_fit_in failed; sending untrimmed")
+    try:
+        raw = await asyncio.wait_for(
+            chat_mdl.async_chat(msg[0]["content"], msg[1:], {"temperature": temperature}),
+            timeout=llm_timeout,
+        )
+    except asyncio.TimeoutError:
+        logging.warning("wiki_refine: chat call timed out after %ds", llm_timeout)
+        return ""
+    except Exception:
+        logging.exception("wiki_refine: chat call failed")
+        return ""
+    if isinstance(raw, tuple):
+        raw = raw[0]
+    return _wiki_strip_think(raw or "")
+
+
+async def _wiki_write_page_simple(
+    plan_item: dict,
+    evidence: list[dict],
+    existing_md: Optional[str],
+    source_context: str,
+    all_plan_slugs: list[str],
+    chat_mdl,
+    llm_timeout: int,
+    example: Optional[str] = None,
+) -> str:
+    """Single LLM call → markdown content.
+
+    ``example`` is the per-template ``parser_config.example`` override
+    for the writer's page-structure section. Falsy / whitespace-only
+    values fall through to ``WIKI_TEMPLATE_EXAMPLE``.
+    """
+    own_slug = plan_item.get("slug") or ""
+    available = [s for s in all_plan_slugs if s and s != own_slug]
+    slugs_block = "\n".join(f"- [[{s}]]" for s in available) if available else "(none — this is the only page)"
+
+    if existing_md:
+        existing_section = f"## Existing page content (UPDATE — integrate new evidence into this)\n\n{existing_md}\n"
+    else:
+        existing_section = ""
+
+    user_prompt = WIKI_REFINE_WRITER_USER_TEMPLATE.format(
+        action=plan_item.get("action", "CREATE"),
+        slug=own_slug,
+        title=plan_item.get("title", own_slug),
+        page_type=plan_item.get("page_type", "concept"),
+        all_plan_slugs=slugs_block,
+        existing_section=existing_section,
+        source_context=source_context,
+        evidence_count=len(evidence),
+        evidence_blocks=_wiki_format_evidence_blocks(evidence),
+    )
+
+    return await _wiki_chat_text(
+        chat_mdl,
+        _build_refine_writer_system(example),
+        user_prompt,
+        temperature=0.15,
+        llm_timeout=llm_timeout,
+    )
+
+
+async def _wiki_merge_page_content(
+    existing_md: str,
+    new_md: str,
+    slug: str,
+    chat_mdl,
+    shrink_threshold: float = WIKI_MERGE_BODY_SHRINK_THRESHOLD,
+    llm_timeout: int = WIKI_MERGE_TIMEOUT,
+) -> str:
+    """LLM-merge existing vs new. Falls back to ``new_md`` on shrink-check
+    failure or LLM error."""
+    if not existing_md or len(existing_md.strip()) < 50:
+        return new_md
+    if existing_md.strip() == (new_md or "").strip():
+        return new_md
+    if not new_md:
+        return existing_md
+
+    user_prompt = (
+        f"Merge these two versions of wiki page `{slug}`:\n\n"
+        f"## EXISTING VERSION\n\n{existing_md}\n\n"
+        "---\n\n"
+        f"## INCOMING VERSION\n\n{new_md}\n\n"
+        "---\n\n"
+        "Produce the merged page now. Return ONLY the markdown content."
+    )
+    merged = await _wiki_chat_text(
+        chat_mdl,
+        WIKI_REFINE_MERGE_SYSTEM,
+        user_prompt,
+        temperature=0.1,
+        llm_timeout=llm_timeout,
+    )
+    if not merged:
+        return new_md
+
+    max_input_len = max(len(existing_md), len(new_md))
+    min_acceptable = int(max_input_len * shrink_threshold)
+    if len(merged) < min_acceptable:
+        logging.warning(
+            "wiki_refine: merge rejected for slug=%s (merged=%d chars < %d threshold; max input=%d). Falling back to new content.",
+            slug,
+            len(merged),
+            min_acceptable,
+            max_input_len,
+        )
+        return new_md
+    return merged
+
+
+def _wiki_extract_summary(content_md: str, max_chars: int = 300) -> str:
+    """First non-heading paragraph of the markdown, capped at ``max_chars``."""
+    if not isinstance(content_md, str) or not content_md.strip():
+        return ""
+    buf: list[str] = []
+    for line in content_md.splitlines():
+        s = line.strip()
+        if not s or s.startswith("#"):
+            if buf:
+                break
+            continue
+        buf.append(s)
+        if len(" ".join(buf)) >= max_chars:
+            break
+    return " ".join(buf)[:max_chars]
+
+
+def _wiki_draft_row_id(kb_id: str, slug: str) -> str:
+    return _stable_row_id(WIKI_DRAFT_COMPILE_KWD, kb_id, slug)
+
+
+async def _wiki_persist_draft(
+    page: dict,
+    tenant_id: str,
+    kb_id: str,
+    plan_input_hash: str = "",
+) -> None:
+    """Upsert one non-searchable wiki_page_draft row (resume cache).
+
+    ``plan_input_hash`` is the PLAN's ``input_hash_kwd`` at the time this
+    draft was produced. The next REFINE re-entry compares it against the
+    current PLAN hash to decide whether the cached draft is still
+    valid; a mismatch forces a rewrite for that slug.
+    """
+    from common import settings
+    from rag.nlp import search as _rag_search
+
+    slug = page.get("slug") or ""
+    if not slug:
+        return
+    index = _rag_search.index_name(tenant_id)
+    content_with_weight = json.dumps(page, ensure_ascii=False)
+    row = {
+        "id": _wiki_draft_row_id(kb_id, slug),
+        "doc_id": str(kb_id),
+        "compile_kwd": WIKI_DRAFT_COMPILE_KWD,
+        "artifact_slug_kwd": slug,
+        "source_id": [str(kb_id)],
+        "input_hash_kwd": plan_input_hash,
+        "content_with_weight": content_with_weight,
+        "available_int": 0,  # non-searchable
+    }
+    try:
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": WIKI_DRAFT_COMPILE_KWD, "artifact_slug_kwd": slug},
+                index,
+                kb_id,
+            )
+        except Exception:
+            logging.debug("wiki_refine: prior draft delete failed; relying on id upsert")
+        await thread_pool_exec(settings.docStoreConn.insert, [row], index, kb_id)
+    except Exception:
+        logging.exception("wiki_refine: failed to persist draft slug=%s", slug)
+
+
+async def _wiki_load_refine_resume(
+    tenant_id: str,
+    kb_id: str,
+) -> dict[str, tuple[dict, str]]:
+    """Load all cached wiki_page_draft rows for this KB.
+
+    Returns ``{slug: (page, stored_plan_input_hash)}``. The hash lets
+    REFINE invalidate drafts whose upstream plan has shifted on a
+    re-run; legacy rows without the field show up as ``""`` and are
+    treated as always-stale.
+    """
+    from common import settings
+    from common.doc_store.doc_store_base import OrderByExpr
+    from rag.nlp import search as _rag_search
+
+    index = _rag_search.index_name(tenant_id)
+    condition = {"compile_kwd": [WIKI_DRAFT_COMPILE_KWD]}
+    select_fields = ["id", "artifact_slug_kwd", "content_with_weight", "input_hash_kwd"]
+
+    PAGE_SIZE = 500
+    offset = 0
+    out: dict[str, tuple[dict, str]] = {}
+    while True:
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                condition,
+                [],
+                OrderByExpr(),
+                offset,
+                PAGE_SIZE,
+                index,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception("wiki_refine: failed to page draft cache")
+            break
+        if not field_map:
+            break
+        for row in field_map.values():
+            slug = row.get("artifact_slug_kwd")
+            content = row.get("content_with_weight")
+            if not isinstance(slug, str) or not isinstance(content, str):
+                continue
+            try:
+                cached = json.loads(content)
+            except Exception:
+                continue
+            if isinstance(cached, dict):
+                stored_hash = row.get("input_hash_kwd")
+                if not isinstance(stored_hash, str):
+                    stored_hash = ""
+                out[slug] = (cached, stored_hash)
+        if len(field_map) < PAGE_SIZE:
+            break
+        offset += PAGE_SIZE
+    return out
+
+
+# --- public entry ---------------------------------------------------------
+
+
+async def wiki_refine_from_plan(
+    chat_mdl,
+    embd_mdl,
+    tenant_id: str,
+    kb_id: str,
+    max_workers: int = DEFAULT_WIKI_REFINE_WORKERS,
+    llm_timeout: int = DEFAULT_WIKI_REFINE_TIMEOUT,
+    source_budget_chars: int = WIKI_REFINE_SOURCE_BUDGET_CHARS,
+    merge_shrink_threshold: float = WIKI_MERGE_BODY_SHRINK_THRESHOLD,
+    force_rerun: bool = False,
+    callback: Optional[Callable] = None,
+    example: Optional[str] = None,
+) -> list[dict]:
+    """Phase 4 (REFINE) — KB-scoped.
+
+    Reads the cached ``wiki_compilation_plan`` for this KB and writes one
+    wiki page per planned entry. Writers run in parallel under
+    ``asyncio.Semaphore(max_workers)``. UPDATE pages are LLM-merged against
+    their existing content (sanity-checked at ``merge_shrink_threshold``).
+    Each finished page is persisted as a searchable ``wiki_page`` row in ES,
+    plus a non-searchable ``wiki_page_draft`` row for resume.
+
+    Args:
+        chat_mdl, embd_mdl: ragflow LLMBundle instances.
+        tenant_id, kb_id: address the doc-store index.
+        max_workers: max concurrent writers (default 4).
+        llm_timeout: seconds per writer LLM call (default 300).
+        source_budget_chars: max chars of source-chunk context per writer call.
+        merge_shrink_threshold: a merged body shorter than this fraction of
+            the longest input falls back to the new content.
+        force_rerun: ignore the wiki_page_draft cache and re-write everything.
+        callback: optional ``(progress: float, msg: str)`` callback.
+
+    Returns the list of page dicts (one per planned entry). Each page dict
+    has ``slug, title, page_type, action, content_md, summary,
+    entity_names, related_kb_pages, source_chunk_ids``.
+    """
+    # Defensive: some callers accidentally pass the result of
+    # ``LLMBundle.encode()`` (a tuple) instead of the bundle itself. Earlier
+    # phases often hit a resume cache so this surfaces here for the first
+    # time. ``_ensure_llm_bundle`` unwraps the tuple if possible, otherwise
+    # logs and returns ``None`` so we bail cleanly.
+    embd_mdl = _ensure_llm_bundle(embd_mdl, "encode", label="wiki_refine: embd_mdl")
+    if embd_mdl is None:
+        return []
+    chat_mdl = _ensure_llm_bundle(chat_mdl, "async_chat", label="wiki_refine: chat_mdl")
+    if chat_mdl is None:
+        return []
+
+    if callback:
+        try:
+            callback(0.02, "wiki REFINE: loading plan")
+        except Exception:
+            pass
+
+    plan_pair = await _wiki_load_plan_resume(tenant_id, kb_id)
+    if plan_pair is None:
+        logging.warning("wiki_refine: no wiki_compilation_plan found for kb=%s", kb_id)
+        return []
+    plan, plan_input_hash = plan_pair
+    if not isinstance(plan, dict):
+        logging.warning("wiki_refine: cached plan is not a dict for kb=%s", kb_id)
+        return []
+
+    pages_spec = plan.get("pages") or []
+    if not pages_spec:
+        logging.info("wiki_refine: plan has no pages for kb=%s", kb_id)
+        return []
+    # Sort by priority then dedupe by slug, keeping the first (highest-priority)
+    # entry. The planning LLM sometimes emits the same slug multiple times,
+    # which both wastes writer calls and bloats every prompt's "Available
+    # pages" list with duplicates.
+    sorted_spec = sorted(
+        [p for p in pages_spec if isinstance(p, dict) and p.get("slug")],
+        key=lambda p: float(p.get("priority", 99)),
+    )
+    seen_slugs: set[str] = set()
+    pages_spec = []
+    duplicates_dropped = 0
+    for p in sorted_spec:
+        s = p.get("slug")
+        if not s:
+            continue
+        if s in seen_slugs:
+            duplicates_dropped += 1
+            continue
+        seen_slugs.add(s)
+        pages_spec.append(p)
+    if duplicates_dropped:
+        logging.info(
+            "wiki_refine: dropped %d duplicate slug entr(ies) from plan for kb=%s",
+            duplicates_dropped,
+            kb_id,
+        )
+
+    all_claims = plan.get("_claims") or []
+    # ``all_plan_slugs`` is implicitly deduped now (pages_spec is unique).
+    all_plan_slugs = [p["slug"] for p in pages_spec]
+
+    # Build canonical entity/concept lookups for evidence fallback. When MAP
+    # produced no claims (a real failure mode we've seen on Chinese / dense
+    # technical content), provenance still resolves via the chunk_ids on
+    # the entities and concepts themselves. The lookups index every name
+    # variant (canonical + aliases) so the planner LLM picking an alias
+    # spelling still hits the right canonical record.
+    entity_by_name: dict[str, dict] = {}
+    for e in plan.get("_entities") or []:
+        if not isinstance(e, dict):
+            continue
+        canon = (e.get("name") or "").strip()
+        if canon:
+            entity_by_name.setdefault(canon.lower(), e)
+        for alias in e.get("aliases") or []:
+            if isinstance(alias, str) and alias.strip():
+                entity_by_name.setdefault(alias.strip().lower(), e)
+
+    concept_by_term: dict[str, dict] = {}
+    for c in plan.get("_concepts") or []:
+        if not isinstance(c, dict):
+            continue
+        term = (c.get("term") or "").strip()
+        if term:
+            concept_by_term.setdefault(term.lower(), c)
+        # Concepts in REDUCE output rarely carry aliases, but accept them if
+        # present so a future MAP schema change is forward-compatible.
+        for alias in c.get("aliases") or []:
+            if isinstance(alias, str) and alias.strip():
+                concept_by_term.setdefault(alias.strip().lower(), c)
+
+    # Resume cache — only honour drafts whose stored PLAN input_hash
+    # matches the current plan's. Mismatch (or missing on legacy rows)
+    # forces that slug to be rewritten. ``force_rerun`` still nukes
+    # everything for the admin "rebuild from scratch" path.
+    cached: dict[str, dict] = {}
+    stale_drafts = 0
+    if not force_rerun:
+        all_drafts = await _wiki_load_refine_resume(tenant_id, kb_id)
+        for slug, (page, stored_hash) in all_drafts.items():
+            if plan_input_hash and stored_hash and stored_hash == plan_input_hash:
+                cached[slug] = page
+            else:
+                stale_drafts += 1
+        if cached or stale_drafts:
+            logging.info(
+                "wiki_refine: resume — %d fresh, %d stale draft(s) for kb=%s",
+                len(cached),
+                stale_drafts,
+                kb_id,
+            )
+
+    pending = [p for p in pages_spec if p.get("slug") not in cached]
+    total = max(1, len(pending))
+
+    if callback:
+        try:
+            callback(0.1, f"wiki REFINE: writing {len(pending)} page(s) (cached={len(cached)})")
+        except Exception:
+            pass
+
+    semaphore = asyncio.Semaphore(max_workers) if max_workers and max_workers > 0 else None
+    completed = 0
+    completed_lock = asyncio.Lock()
+
+    async def _write_one(plan_item: dict) -> Optional[dict]:
+        nonlocal completed
+        slug = plan_item.get("slug") or ""
+        action = (plan_item.get("action") or "CREATE").upper()
+        title = plan_item.get("title") or slug
+        page_type = plan_item.get("page_type") or "concept"
+
+        async def _run() -> Optional[dict]:
+            nonlocal completed
+            try:
+                evidence = _wiki_assemble_evidence(
+                    plan_item,
+                    all_claims,
+                    entity_by_name=entity_by_name,
+                    concept_by_term=concept_by_term,
+                )
+                source_chunk_ids = _wiki_collect_evidence_chunk_ids(evidence)
+                source_context = await _wiki_build_source_context(
+                    evidence,
+                    tenant_id,
+                    kb_id,
+                    budget=source_budget_chars,
+                )
+
+                # Use the raw [[slug]] form for the writer and merger so the
+                # LLM sees a stable, well-known artifactlink notation; we render
+                # to clickable links once at persist time.
+                existing_md_raw: Optional[str] = None
+                if action == "UPDATE":
+                    existing = await _wiki_get_existing_page(slug, tenant_id, kb_id)
+                    if existing:
+                        existing_md_raw = existing.get("content_md_raw") or existing.get("content_md")
+
+                content_md_raw = await _wiki_write_page_simple(
+                    plan_item,
+                    evidence,
+                    existing_md_raw,
+                    source_context,
+                    all_plan_slugs,
+                    chat_mdl,
+                    llm_timeout,
+                    example=example,
+                )
+                if not content_md_raw:
+                    content_md_raw = f"# {title}\n\n(Page generation produced no content.)"
+
+                if existing_md_raw:
+                    content_md_raw = await _wiki_merge_page_content(
+                        existing_md_raw,
+                        content_md_raw,
+                        slug,
+                        chat_mdl,
+                        shrink_threshold=merge_shrink_threshold,
+                    )
+
+                # Render artifactlinks once, here, after all LLM transforms.
+                content_md_rendered, outlinks = _wiki_transform_links(content_md_raw, kb_id)
+                source_doc_ids = await _wiki_collect_doc_ids(source_chunk_ids, tenant_id, kb_id)
+                summary = _wiki_extract_summary(content_md_rendered) or title
+
+                page = {
+                    "slug": slug,
+                    "title": title,
+                    "page_type": page_type,
+                    "action": action,
+                    # Rendered content (with clickable artifact/{kb_id}/{slug} links) is
+                    # what callers and the UI consume; the raw [[slug]] form is
+                    # preserved for LLM-facing re-reads and the merger.
+                    "content_md": content_md_rendered,
+                    "content_md_rendered": content_md_rendered,
+                    "content_md_raw": content_md_raw,
+                    "outlinks": outlinks,
+                    "summary": summary,
+                    "entity_names": plan_item.get("entity_names") or [],
+                    "related_kb_pages": plan_item.get("related_kb_pages") or [],
+                    "source_chunk_ids": source_chunk_ids,
+                    "source_doc_ids": source_doc_ids,
+                    "kb_id": str(kb_id),
+                }
+            except Exception:
+                logging.exception("wiki_refine: writer failed for slug=%s", slug)
+                return None
+
+            # Searchable artifact_page persistence has moved to the task
+            # handler (TaskHandler._persist_wiki_pages_to_es) so the ES
+            # schema can be controlled in one place at the ingest layer.
+            # REFINE now just builds the page dict and resume cache.
+            try:
+                await _wiki_persist_draft(
+                    page,
+                    tenant_id,
+                    kb_id,
+                    plan_input_hash=plan_input_hash,
+                )
+            except Exception:
+                logging.exception("wiki_refine: persist_draft failed for slug=%s", slug)
+
+            if callback:
+                async with completed_lock:
+                    completed += 1
+                    done = completed
+                progress = 0.1 + 0.85 * (done / total)
+                try:
+                    callback(progress, f"wiki REFINE: {done}/{total} pages written ({slug})")
+                except Exception:
+                    pass
+            return page
+
+        if semaphore is not None:
+            async with semaphore:
+                return await _run()
+        return await _run()
+
+    tasks = [asyncio.create_task(_write_one(p)) for p in pending]
+    if tasks:
+        try:
+            new_pages = await asyncio.gather(*tasks, return_exceptions=False)
+        except Exception:
+            for t in tasks:
+                t.cancel()
+            await asyncio.gather(*tasks, return_exceptions=True)
+            raise
+    else:
+        new_pages = []
+
+    results: list[dict] = []
+    # Cached pages first (in plan order), then freshly written ones.
+    for p in pages_spec:
+        slug = p.get("slug")
+        if not slug:
+            continue
+        if slug in cached:
+            results.append(cached[slug])
+        else:
+            # Look up the freshly produced page (None on writer failure).
+            for np in new_pages:
+                if np and np.get("slug") == slug:
+                    results.append(np)
+                    break
+
+    logging.info(
+        "wiki_refine: kb=%s done — pages written=%d (cached=%d new=%d)",
+        kb_id,
+        len(results),
+        len(cached),
+        sum(1 for p in new_pages if p),
+    )
+
+    if callback:
+        try:
+            callback(1.0, "wiki REFINE: done")
+        except Exception:
+            pass
+
+    return results
+
+
+__all__ = [
+    "WIKI_MAP_COMPILE_KWD",
+    "WIKI_REDUCE_COMPILE_KWD",
+    "WIKI_PLAN_COMPILE_KWD",
+    "WIKI_PAGE_COMPILE_KWD",
+    "WIKI_DRAFT_COMPILE_KWD",
+    "wiki_map_from_chunks",
+    "wiki_reduce_from_extracts",
+    "wiki_plan_from_reduction",
+    "wiki_refine_from_plan",
+]
diff --git a/rag/flow/extractor/extractor.py b/rag/flow/extractor/extractor.py
index 07de5d2f6e..1e216e6800 100644
--- a/rag/flow/extractor/extractor.py
+++ b/rag/flow/extractor/extractor.py
@@ -17,9 +17,16 @@ import logging
 import random
 from copy import deepcopy
 
+from api.db.services.document_service import DocumentService
+from api.db.services.llm_service import LLMBundle
+from common.constants import LLMType
 import xxhash
 
 from agent.component.llm import LLMParam, LLM
+from rag.advanced_rag.knowlege_compile.structure import (
+    compile_structure_from_text,
+    merge_compiled_structures,
+)
 from rag.flow.base import ProcessBase, ProcessParamBase
 from rag.prompts.generator import run_toc_from_text
 
@@ -28,6 +35,7 @@ class ExtractorParam(ProcessParamBase, LLMParam):
     def __init__(self):
         super().__init__()
         self.field_name = ""
+        self.knowledge_compilation = {}
 
     def check(self):
         super().check()
@@ -38,22 +46,25 @@ class Extractor(ProcessBase, LLM):
     component_name = "Extractor"
 
     async def _build_TOC(self, docs):
-        self.callback(0.2,message="Start to generate table of content ...")
-        docs = sorted(docs, key=lambda d:(
-            d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
-            d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
-        ))
+        self.callback(0.2, message="Start to generate table of content ...")
+        docs = sorted(
+            docs,
+            key=lambda d: (
+                d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
+                d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0),
+            ),
+        )
         toc = await run_toc_from_text([d["text"] for d in docs], self.chat_mdl)
-        logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent='  '))
+        logging.info("------------ T O C -------------\n" + json.dumps(toc, ensure_ascii=False, indent="  "))
         ii = 0
         while ii < len(toc):
             try:
                 idx = int(toc[ii]["chunk_id"])
                 del toc[ii]["chunk_id"]
                 toc[ii]["ids"] = [docs[idx]["id"]]
-                if ii == len(toc) -1:
+                if ii == len(toc) - 1:
                     break
-                for jj in range(idx+1, int(toc[ii+1]["chunk_id"])+1):
+                for jj in range(idx + 1, int(toc[ii + 1]["chunk_id"]) + 1):
                     toc[ii]["ids"].append(docs[jj]["id"])
             except Exception as e:
                 logging.exception(e)
@@ -71,6 +82,20 @@ class Extractor(ProcessBase, LLM):
             return d
         return None
 
+    async def _knowledge_compile(self, docs):
+        embedding_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.EMBEDDING, max_retries=self._param.max_retries, retry_interval=self._param.delay_after_error)
+        self.callback(0.2, message="Start to generate table of content ...")
+        docs = sorted(
+            docs,
+            key=lambda d: (
+                d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
+                d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0),
+            ),
+        )
+        docs = await compile_structure_from_text(docs, self._param.knowledge_compilation, self.chat_mdl, embedding_model, self._canvas._doc_id)
+        info = await merge_compiled_structures(docs, self.chat_mdl, embedding_model, self._canvas.get_tenant_id(), DocumentService.get_knowledgebase_id(self._canvas._doc_id))
+        return info
+
     async def _invoke(self, **kwargs):
         self.set_output("output_format", "chunks")
         self.callback(random.randint(1, 5) / 100.0, "Start to generate.")
@@ -89,10 +114,17 @@ class Extractor(ProcessBase, LLM):
                 for ck in chunks:
                     ck["doc_id"] = self._canvas._doc_id
                     ck["id"] = xxhash.xxh64((ck["text"] + str(ck["doc_id"])).encode("utf-8")).hexdigest()
-                toc =await self._build_TOC(chunks)
+                toc = await self._build_TOC(chunks)
                 chunks.append(toc)
                 self.set_output("chunks", chunks)
                 return
+            if self._param.field_name in ["set", "list", "graph"]:
+                for ck in chunks:
+                    ck["doc_id"] = self._canvas._doc_id
+                    ck["id"] = xxhash.xxh64((ck["text"] + str(ck["doc_id"])).encode("utf-8")).hexdigest()
+                await self._knowledge_compile(chunks)
+                self.set_output("chunks", chunks)
+                return
 
             prog = 0
             for i, ck in enumerate(chunks):
@@ -100,12 +132,11 @@ class Extractor(ProcessBase, LLM):
                 msg, sys_prompt = self._sys_prompt_and_msg([], args)
                 msg.insert(0, {"role": "system", "content": sys_prompt})
                 ck[self._param.field_name] = await self._generate_async(msg)
-                prog += 1./len(chunks)
-                if i % (len(chunks)//100+1) == 1:
-                    self.callback(prog, f"{i+1} / {len(chunks)}")
+                prog += 1.0 / len(chunks)
+                if i % (len(chunks) // 100 + 1) == 1:
+                    self.callback(prog, f"{i + 1} / {len(chunks)}")
             self.set_output("chunks", chunks)
         else:
             msg, sys_prompt = self._sys_prompt_and_msg([], args)
             msg.insert(0, {"role": "system", "content": sys_prompt})
             self.set_output("chunks", [{self._param.field_name: await self._generate_async(msg)}])
-
diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index 3a702f3bc5..0f97a1a537 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -31,7 +31,9 @@ from common import settings
 
 from common.misc_utils import thread_pool_exec
 
-def index_name(uid): return f"ragflow_{uid}"
+
+def index_name(uid):
+    return f"ragflow_{uid}"
 
 
 class Dealer:
@@ -54,11 +56,10 @@ class Dealer:
         qv, _ = await thread_pool_exec(emb_mdl.encode_queries, txt)
         shape = np.array(qv).shape
         if len(shape) > 1:
-            raise Exception(
-                f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
+            raise Exception(f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
         embedding_data = [get_float(v) for v in qv]
         vector_column_name = f"q_{len(embedding_data)}_vec"
-        return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})
+        return MatchDenseExpr(vector_column_name, embedding_data, "float", "cosine", topk, {"similarity": similarity})
 
     async def _existing_doc_ids(self, doc_ids: list[str]) -> set[str]:
         if not doc_ids:
@@ -123,18 +124,14 @@ class Dealer:
             if key in req and req[key] is not None:
                 condition[field] = req[key]
         # TODO(yzc): `available_int` is nullable however infinity doesn't support nullable columns.
-        for key in ["knowledge_graph_kwd", "available_int", "entity_kwd", "from_entity_kwd", "to_entity_kwd",
-                    "removed_kwd"]:
+        for key in ["id", "knowledge_graph_kwd", "available_int", "entity_kwd", "from_entity_kwd", "to_entity_kwd", "removed_kwd"]:
             if key in req and req[key] is not None:
                 condition[key] = req[key]
+        if isinstance(req.get("must_not"), dict):
+            condition["must_not"] = req["must_not"]
         return condition
 
-    async def search(self, req, idx_names: str | list[str],
-               kb_ids: list[str],
-               emb_mdl=None,
-               highlight: bool | list | None = None,
-               rank_feature: dict | None = None
-               ):
+    async def search(self, req, idx_names: str | list[str], kb_ids: list[str], emb_mdl=None, highlight: bool | list | None = None, rank_feature: dict | None = None):
         if highlight is None:
             highlight = False
 
@@ -146,11 +143,33 @@ class Dealer:
         ps = int(req.get("size", topk))
         offset, limit = pg * ps, ps
 
-        src = req.get("fields",
-                      ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", "position_int",
-                       "doc_id", "chunk_order_int", "page_num_int", "top_int", "create_timestamp_flt", "knowledge_graph_kwd",
-                       "question_kwd", "question_tks", "doc_type_kwd",
-                       "available_int", "content_with_weight", "mom_id", PAGERANK_FLD, TAG_FLD, "row_id()"])
+        src = req.get(
+            "fields",
+            [
+                "docnm_kwd",
+                "content_ltks",
+                "kb_id",
+                "img_id",
+                "title_tks",
+                "important_kwd",
+                "position_int",
+                "doc_id",
+                "chunk_order_int",
+                "page_num_int",
+                "top_int",
+                "create_timestamp_flt",
+                "knowledge_graph_kwd",
+                "question_kwd",
+                "question_tks",
+                "doc_type_kwd",
+                "available_int",
+                "content_with_weight",
+                "mom_id",
+                PAGERANK_FLD,
+                TAG_FLD,
+                "row_id()",
+            ],
+        )
         kwds = set([])
 
         qst = req.get("question", "")
@@ -173,8 +192,7 @@ class Dealer:
             matchText, keywords = self.qryr.question(qst, min_match=0.3)
             if emb_mdl is None:
                 matchExprs = [matchText]
-                res = await thread_pool_exec(self.dataStore.search, src, highlightFields, filters, matchExprs, orderBy, offset, limit,
-                                            idx_names, kb_ids, rank_feature=rank_feature)
+                res = await thread_pool_exec(self.dataStore.search, src, highlightFields, filters, matchExprs, orderBy, offset, limit, idx_names, kb_ids, rank_feature=rank_feature)
                 total = self.dataStore.get_total(res)
                 logging.debug("Dealer.search TOTAL: {}".format(total))
             else:
@@ -192,8 +210,7 @@ class Dealer:
                 fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05,0.95"})
                 matchExprs = [matchText, matchDense, fusionExpr]
 
-                res = await thread_pool_exec(self.dataStore.search, src, highlightFields, filters, matchExprs, orderBy, offset, limit,
-                                            idx_names, kb_ids, rank_feature=rank_feature)
+                res = await thread_pool_exec(self.dataStore.search, src, highlightFields, filters, matchExprs, orderBy, offset, limit, idx_names, kb_ids, rank_feature=rank_feature)
                 total = self.dataStore.get_total(res)
                 logging.debug("Dealer.search TOTAL: {}".format(total))
 
@@ -205,9 +222,9 @@ class Dealer:
                     else:
                         matchText, _ = self.qryr.question(qst, min_match=0.1)
                         matchDense.extra_options["similarity"] = 0.17
-                        res = await thread_pool_exec(self.dataStore.search, src, highlightFields, filters, [matchText, matchDense, fusionExpr],
-                                                    orderBy, offset, limit, idx_names, kb_ids,
-                                                    rank_feature=rank_feature)
+                        res = await thread_pool_exec(
+                            self.dataStore.search, src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idx_names, kb_ids, rank_feature=rank_feature
+                        )
                         total = self.dataStore.get_total(res)
                     logging.debug("Dealer.search 2 TOTAL: {}".format(total))
 
@@ -225,22 +242,13 @@ class Dealer:
         keywords = list(kwds)
         highlight = self.dataStore.get_highlight(res, keywords, "content_with_weight")
         aggs = self.dataStore.get_aggregation(res, "docnm_kwd")
-        return self.SearchResult(
-            total=total,
-            ids=ids,
-            query_vector=q_vec,
-            aggregation=aggs,
-            highlight=highlight,
-            field=self.dataStore.get_fields(res, src + ["_score"]),
-            keywords=keywords
-        )
+        return self.SearchResult(total=total, ids=ids, query_vector=q_vec, aggregation=aggs, highlight=highlight, field=self.dataStore.get_fields(res, src + ["_score"]), keywords=keywords)
 
     @staticmethod
     def trans2floats(txt):
         return [get_float(t) for t in txt.split("\t")]
 
-    def insert_citations(self, answer, chunks, chunk_v,
-                         embd_mdl, tkweight=0.1, vtweight=0.9):
+    def insert_citations(self, answer, chunks, chunk_v, embd_mdl, tkweight=0.1, vtweight=0.9):
         assert len(chunks) == len(chunk_v)
         if not chunks:
             return answer, set([])
@@ -256,13 +264,10 @@ class Dealer:
                         i += 1
                     if i < len(pieces):
                         i += 1
-                    pieces_.append("".join(pieces[st: i]) + "\n")
+                    pieces_.append("".join(pieces[st:i]) + "\n")
                 else:
                     # Sentence boundary regex includes Arabic punctuation (، ؛ ؟ ۔)
-                    pieces_.extend(
-                        re.split(
-                            r"([^\|][；。？!！،؛؟۔\n]|[a-z\u0600-\u06FF][.?;!،؛؟][ \n])",
-                            pieces[i]))
+                    pieces_.extend(re.split(r"([^\|][；。？!！،؛؟۔\n]|[a-z\u0600-\u06FF][.?;!،؛؟][ \n])", pieces[i]))
                     i += 1
             pieces = pieces_
         else:
@@ -287,30 +292,21 @@ class Dealer:
         for i in range(len(chunk_v)):
             if len(ans_v[0]) != len(chunk_v[i]):
                 chunk_v[i] = [0.0] * len(ans_v[0])
-                logging.warning(
-                    "The dimension of query and chunk do not match: {} vs. {}".format(len(ans_v[0]), len(chunk_v[i])))
+                logging.warning("The dimension of query and chunk do not match: {} vs. {}".format(len(ans_v[0]), len(chunk_v[i])))
 
-        assert len(ans_v[0]) == len(chunk_v[0]), "The dimension of query and chunk do not match: {} vs. {}".format(
-            len(ans_v[0]), len(chunk_v[0]))
+        assert len(ans_v[0]) == len(chunk_v[0]), "The dimension of query and chunk do not match: {} vs. {}".format(len(ans_v[0]), len(chunk_v[0]))
 
-        chunks_tks = [rag_tokenizer.tokenize(self.qryr.rmWWW(ck)).split()
-                      for ck in chunks]
+        chunks_tks = [rag_tokenizer.tokenize(self.qryr.rmWWW(ck)).split() for ck in chunks]
         cites = {}
         thr = 0.63
         while thr > 0.3 and len(cites.keys()) == 0 and pieces_ and chunks_tks:
             for i, a in enumerate(pieces_):
-                sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
-                                                                chunk_v,
-                                                                rag_tokenizer.tokenize(
-                                                                    self.qryr.rmWWW(pieces_[i])).split(),
-                                                                chunks_tks,
-                                                                tkweight, vtweight)
+                sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i], chunk_v, rag_tokenizer.tokenize(self.qryr.rmWWW(pieces_[i])).split(), chunks_tks, tkweight, vtweight)
                 mx = np.max(sim) * 0.99
                 logging.debug("{} SIM: {}".format(pieces_[i], mx))
                 if mx < thr:
                     continue
-                cites[idx[i]] = list(
-                    set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
+                cites[idx[i]] = list(set([str(ii) for ii in range(len(chunk_v)) if sim[ii] > mx]))[:4]
             thr *= 0.8
 
         res = ""
@@ -362,11 +358,9 @@ class Dealer:
                 rank_fea.append(0)
             else:
                 rank_fea.append(nor / np.sqrt(denor) / q_denor)
-        return np.array(rank_fea) * 10. + pageranks
+        return np.array(rank_fea) * 10.0 + pageranks
 
-    async def _knn_scores(self, sres: "Dealer.SearchResult",
-                          idx_names: str | list[str],
-                          kb_ids: list[str]) -> dict[str, float]:
+    async def _knn_scores(self, sres: "Dealer.SearchResult", idx_names: str | list[str], kb_ids: list[str]) -> dict[str, float]:
         """
         Second-pass ES call that returns the cosine similarity between the
         query embedding and each candidate chunk's embedding, filtered to the
@@ -399,10 +393,7 @@ class Dealer:
         )
         return self.dataStore.get_scores(res)
 
-    async def fetch_chunk_vectors(self, chunk_ids: list[str],
-                                  tenant_ids: str | list[str],
-                                  kb_ids: list[str],
-                                  dim: int) -> dict[str, list[float]]:
+    async def fetch_chunk_vectors(self, chunk_ids: list[str], tenant_ids: str | list[str], kb_ids: list[str], dim: int) -> dict[str, list[float]]:
         """
         Citation-time helper: fetch only the embedding vectors for an
         explicit set of chunk ids. Used by callers that need to compute
@@ -440,10 +431,7 @@ class Dealer:
             out[cid] = v
         return out
 
-    def rerank_with_knn(self, sres, query, knn_scores: dict[str, float],
-                        tkweight=0.3, vtweight=0.7,
-                        cfield="content_ltks",
-                        rank_feature: dict | None = None):
+    def rerank_with_knn(self, sres, query, knn_scores: dict[str, float], tkweight=0.3, vtweight=0.7, cfield="content_ltks", rank_feature: dict | None = None):
         """
         Merge ES-side KNN cosine similarity with locally computed term
         similarity using the user-configured weights. Replaces the older
@@ -465,16 +453,12 @@ class Dealer:
             ins_tw.append(tks)
 
         tksim = np.array(self.qryr.token_similarity(keywords, ins_tw), dtype=np.float64)
-        vtsim = np.array([knn_scores.get(chunk_id, 0.0) for chunk_id in sres.ids],
-                         dtype=np.float64)
+        vtsim = np.array([knn_scores.get(chunk_id, 0.0) for chunk_id in sres.ids], dtype=np.float64)
         rank_fea = self._rank_feature_scores(rank_feature, sres)
         sim = tkweight * tksim + vtweight * vtsim + rank_fea
         return sim, tksim, vtsim
 
-    def rerank(self, sres, query, tkweight=0.3,
-               vtweight=0.7, cfield="content_ltks",
-               rank_feature: dict | None = None
-               ):
+    def rerank(self, sres, query, tkweight=0.3, vtweight=0.7, cfield="content_ltks", rank_feature: dict | None = None):
         _, keywords = self.qryr.question(query)
         vector_size = len(sres.query_vector)
         vector_column = f"q_{vector_size}_vec"
@@ -503,16 +487,11 @@ class Dealer:
         ## For rank feature(tag_fea) scores.
         rank_fea = self._rank_feature_scores(rank_feature, sres)
 
-        sim, tksim, vtsim = self.qryr.hybrid_similarity(sres.query_vector,
-                                                        ins_embd,
-                                                        keywords,
-                                                        ins_tw, tkweight, vtweight)
+        sim, tksim, vtsim = self.qryr.hybrid_similarity(sres.query_vector, ins_embd, keywords, ins_tw, tkweight, vtweight)
 
         return sim + rank_fea, tksim, vtsim
 
-    def rerank_by_model(self, rerank_mdl, sres, query, tkweight=0.3,
-                        vtweight=0.7, cfield="content_ltks",
-                        rank_feature: dict | None = None):
+    def rerank_by_model(self, rerank_mdl, sres, query, tkweight=0.3, vtweight=0.7, cfield="content_ltks", rank_feature: dict | None = None):
         _, keywords = self.qryr.question(query)
 
         for i in sres.ids:
@@ -520,7 +499,7 @@ class Dealer:
                 sres.field[i]["important_kwd"] = [sres.field[i]["important_kwd"]]
         ins_tw = []
         for i in sres.ids:
-            #content_ltks = list(OrderedDict.fromkeys(sres.field[i][cfield].split()))
+            # content_ltks = list(OrderedDict.fromkeys(sres.field[i][cfield].split()))
             content_ltks = sres.field[i][cfield].split()
             title_tks = [t for t in sres.field[i].get("title_tks", "").split() if t]
             important_kwd = sres.field[i].get("important_kwd", [])
@@ -540,10 +519,7 @@ class Dealer:
         return tkweight * np.array(tksim) + vtweight * vtsim + rank_fea, tksim, vtsim
 
     def hybrid_similarity(self, ans_embd, ins_embd, ans, inst):
-        return self.qryr.hybrid_similarity(ans_embd,
-                                           ins_embd,
-                                           rag_tokenizer.tokenize(ans).split(),
-                                           rag_tokenizer.tokenize(inst).split())
+        return self.qryr.hybrid_similarity(ans_embd, ins_embd, rag_tokenizer.tokenize(ans).split(), rag_tokenizer.tokenize(inst).split())
 
     @staticmethod
     def _rerank_window(page_size: int, top: int = 0) -> int:
@@ -571,22 +547,22 @@ class Dealer:
         return window
 
     async def retrieval(
-            self,
-            question,
-            embd_mdl,
-            tenant_ids,
-            kb_ids,
-            page,
-            page_size,
-            similarity_threshold=0.2,
-            vector_similarity_weight=0.3,
-            top=1024,
-            doc_ids=None,
-            aggs=True,
-            rerank_mdl=None,
-            highlight=False,
-            rank_feature: dict | None = {PAGERANK_FLD: 10},
-            trace_id=None,
+        self,
+        question,
+        embd_mdl,
+        tenant_ids,
+        kb_ids,
+        page,
+        page_size,
+        similarity_threshold=0.2,
+        vector_similarity_weight=0.3,
+        top=1024,
+        doc_ids=None,
+        aggs=True,
+        rerank_mdl=None,
+        highlight=False,
+        rank_feature: dict | None = {PAGERANK_FLD: 10},
+        trace_id=None,
     ):
         ranks = {"total": 0, "chunks": [], "doc_aggs": {}}
         if not question:
@@ -617,8 +593,7 @@ class Dealer:
             tenant_ids = tenant_ids.split(",")
 
         idx_names = [index_name(tid) for tid in tenant_ids]
-        sres = await self.search(req, idx_names, kb_ids, embd_mdl, highlight,
-                           rank_feature=rank_feature)
+        sres = await self.search(req, idx_names, kb_ids, embd_mdl, highlight, rank_feature=rank_feature)
         # Temporary retrieval-side guard: prune chunks whose parent document no
         # longer exists before reranking and returning results.
         sres = await self._prune_deleted_chunks(sres)
@@ -628,8 +603,7 @@ class Dealer:
 
         term_similarity_weight = 1 - vector_similarity_weight
         logging.debug(
-            "[Search] retrieval weights: trace_id=%s kb_count=%s similarity_threshold=%s "
-            "vector_similarity_weight=%s full_text_weight=%s rerank_enabled=%s",
+            "[Search] retrieval weights: trace_id=%s kb_count=%s similarity_threshold=%s vector_similarity_weight=%s full_text_weight=%s rerank_enabled=%s",
             trace_id,
             len(kb_ids),
             similarity_threshold,
@@ -685,7 +659,7 @@ class Dealer:
             return ranks
 
         # Use stable sort for deterministic ordering when scores are tied
-        sorted_idx = np.argsort(sim_np * -1, kind='stable')
+        sorted_idx = np.argsort(sim_np * -1, kind="stable")
 
         # When vector_similarity_weight is 0, similarity_threshold is not meaningful for term-only scores.
         post_threshold = 0.0 if vector_similarity_weight <= 0 else similarity_threshold
@@ -774,12 +748,17 @@ class Dealer:
         tbl = self.dataStore.sql(sql, fetch_size, format)
         return tbl
 
-    def chunk_list(self, doc_id: str, tenant_id: str,
-                   kb_ids: list[str], max_count=1024,
-                   offset=0,
-                   fields=["docnm_kwd", "content_with_weight", "img_id"],
-                   sort_by_position: bool = False,
-                   retrieve_all: bool = False):
+    def chunk_list(
+        self,
+        doc_id: str,
+        tenant_id: str,
+        kb_ids: list[str],
+        max_count=1024,
+        offset=0,
+        fields=["docnm_kwd", "content_with_weight", "img_id"],
+        sort_by_position: bool = False,
+        retrieve_all: bool = False,
+    ):
         """Return chunks for a document.
 
         By default, preserve the historical max_count cap. When retrieve_all is
@@ -807,8 +786,7 @@ class Dealer:
             limit = bs if retrieve_all else min(bs, max_count - p)
             if limit <= 0:
                 break
-            es_res = self.dataStore.search(fields, [], condition, [], orderBy, p, limit, index_name(tenant_id),
-                                           kb_ids)
+            es_res = self.dataStore.search(fields, [], condition, [], orderBy, p, limit, index_name(tenant_id), kb_ids)
             dict_chunks = self.dataStore.get_fields(es_res, fields)
             for id, doc in dict_chunks.items():
                 doc["id"] = id
@@ -834,15 +812,13 @@ class Dealer:
 
     def tag_content(self, tenant_id: str, kb_ids: list[str], doc, all_tags, topn_tags=3, keywords_topn=30, S=1000):
         idx_nm = index_name(tenant_id)
-        match_txt = self.qryr.paragraph(doc["title_tks"] + " " + doc["content_ltks"], doc.get("important_kwd", []),
-                                        keywords_topn)
+        match_txt = self.qryr.paragraph(doc["title_tks"] + " " + doc["content_ltks"], doc.get("important_kwd", []), keywords_topn)
         res = self.dataStore.search([], [], {}, [match_txt], OrderByExpr(), 0, 0, idx_nm, kb_ids, ["tag_kwd"])
         aggs = self.dataStore.get_aggregation(res, "tag_kwd")
         if not aggs:
             return False
         cnt = np.sum([c for _, c in aggs])
-        tag_fea = sorted([(a, round(0.1 * (c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs],
-                         key=lambda x: x[1] * -1)[:topn_tags]
+        tag_fea = sorted([(a, round(0.1 * (c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags]
         doc[TAG_FLD] = {a.replace(".", "_"): c for a, c in tag_fea if c > 0}
         return True
 
@@ -857,12 +833,12 @@ class Dealer:
         if not aggs:
             return {}
         cnt = np.sum([c for _, c in aggs])
-        tag_fea = sorted([(a, round(0.1 * (c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs],
-                         key=lambda x: x[1] * -1)[:topn_tags]
+        tag_fea = sorted([(a, round(0.1 * (c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags]
         return {a.replace(".", "_"): max(1, c) for a, c in tag_fea}
 
     async def retrieval_by_toc(self, query: str, chunks: list[dict], tenant_ids: list[str], chat_mdl, topn: int = 6):
-        from rag.prompts.generator import relevant_chunks_with_toc # moved from the top of the file to avoid circular import
+        from rag.prompts.generator import relevant_chunks_with_toc  # moved from the top of the file to avoid circular import
+
         if not chunks:
             return []
         idx_nms = [index_name(tid) for tid in tenant_ids]
@@ -872,11 +848,9 @@ class Dealer:
                 ranks[ck["doc_id"]] = 0
             ranks[ck["doc_id"]] += ck["similarity"]
             doc_id2kb_id[ck["doc_id"]] = ck["kb_id"]
-        doc_id = sorted(ranks.items(), key=lambda x: x[1] * -1.)[0][0]
+        doc_id = sorted(ranks.items(), key=lambda x: x[1] * -1.0)[0][0]
         kb_ids = [doc_id2kb_id[doc_id]]
-        es_res = self.dataStore.search(["content_with_weight"], [], {"doc_id": doc_id, "toc_kwd": "toc"}, [],
-                                       OrderByExpr(), 0, 128, idx_nms,
-                                       kb_ids)
+        es_res = self.dataStore.search(["content_with_weight"], [], {"doc_id": doc_id, "toc_kwd": "toc"}, [], OrderByExpr(), 0, 128, idx_nms, kb_ids)
         toc = []
         dict_chunks = self.dataStore.get_fields(es_res, ["content_with_weight"])
         for _, doc in dict_chunks.items():
@@ -914,7 +888,7 @@ class Dealer:
                 "term_similarity": sim,
                 "vector": [0.0] * vector_size,
                 "positions": chunk.get("position_int", []),
-                "doc_type_kwd": chunk.get("doc_type_kwd", "")
+                "doc_type_kwd": chunk.get("doc_type_kwd", ""),
             }
             for k in chunk.keys():
                 if k[-4:] == "_vec":
@@ -951,7 +925,8 @@ class Dealer:
             if chunk is None:
                 logging.warning(
                     "Parent chunk '%s' not found in the index; falling back to %d child chunk(s).",
-                    id, len(cks),
+                    id,
+                    len(cks),
                 )
                 chunks.extend(cks)
                 continue
@@ -969,7 +944,7 @@ class Dealer:
                 "term_similarity": np.mean([ck["similarity"] for ck in cks]),
                 "vector": [0.0] * vector_size,
                 "positions": chunk.get("position_int", []),
-                "doc_type_kwd": chunk.get("doc_type_kwd", "")
+                "doc_type_kwd": chunk.get("doc_type_kwd", ""),
             }
             for k in cks[0].keys():
                 if k[-4:] == "_vec":
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index c3b720811b..a7e113b3fc 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -84,7 +84,7 @@ from common.versions import get_ragflow_version
 from api.db.db_models import close_connection
 from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio, email, tag
 from rag.nlp import search, rag_tokenizer, add_positions
-from rag.raptor import (
+from rag.advanced_rag.knowlege_compile.raptor import (
     RAPTOR_TREE_BUILDER,
 )
 from common.token_utils import num_tokens_from_string, truncate
@@ -136,6 +136,8 @@ TASK_TYPE_TO_PIPELINE_TASK_TYPE = {
     "graphrag": PipelineTaskType.GRAPH_RAG,
     "mindmap": PipelineTaskType.MINDMAP,
     "memory": PipelineTaskType.MEMORY,
+    "artifact": PipelineTaskType.ARTIFACT,
+    "skill": PipelineTaskType.SKILL,
 }
 
 UNACKED_ITERATOR = None
@@ -250,6 +252,13 @@ async def collect():
 
     task_type = msg.get("task_type", "")
     task["task_type"] = task_type
+    # Per-doc fan-out task types (today: doc-scoped raptor) carry their
+    # participating doc id list on the Redis message but not on the DB
+    # row. The KB-scoped branch above already does this for FAKE doc
+    # tasks; mirror here so ``ctx.doc_ids`` is populated for the
+    # per-doc path too.
+    if "doc_ids" in msg and not task.get("doc_ids"):
+        task["doc_ids"] = msg.get("doc_ids", []) or []
     if task_type[:8] == "dataflow":
         task["tenant_id"] = msg["tenant_id"]
         task["dataflow_id"] = msg["dataflow_id"]
@@ -1060,7 +1069,7 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si
         """Run RAPTOR and append generated summary chunks for one doc id."""
         nonlocal tk_count, res
         logging.info("RAPTOR: using tree_builder=%s clustering_method=%s for doc %s", tree_builder, clustering_method, did)
-        from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor  # Lazy load, save around 8s
+        from rag.advanced_rag.knowlege_compile.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor  # Lazy load, save around 8s
 
         raptor = Raptor(
             raptor_config.get("max_cluster", 64),
@@ -1532,6 +1541,9 @@ async def do_handle_task(task):
         progress_callback(1, "place holder")
         pass
         return
+    elif task_type == "skill":
+        progress_callback(-1, "Skill generation requires the refactored task executor (TE_RUN_MODE=0).")
+        return
     else:
         # Standard chunking methods
         task["llm_id"] = doc_task_llm_id
@@ -1563,6 +1575,7 @@ async def do_handle_task(task):
         progress_message = "Embedding chunks ({:.2f}s)".format(timer() - start_ts)
         logging.info(progress_message)
         progress_callback(msg=progress_message)
+
         if task["parser_id"].lower() == "naive" and task["parser_config"].get("toc_extraction", False):
             toc_thread = asyncio.create_task(asyncio.to_thread(build_TOC, task, chunks, progress_callback))
 
@@ -1739,8 +1752,11 @@ async def handle_task():
     finally:
         if not task.get("dataflow_id", ""):
             referred_document_id = None
-            if task_type in ["graphrag", "raptor", "mindmap"]:
-                referred_document_id = task["doc_ids"][0]
+            if task_type in ["graphrag", "raptor", "mindmap", "artifact", "skill"]:
+                # KB-level fan-out tasks store the participating doc list in
+                # task["doc_ids"]; the first entry is used as a referent so
+                # the pipeline operation log has something to anchor to.
+                referred_document_id = (task.get("doc_ids") or [None])[0]
             ret = PipelineOperationLogService.record_pipeline_operation(
                 document_id=task["doc_id"], pipeline_id="", task_type=pipeline_task_type, task_id=task_id, referred_document_id=referred_document_id
             )
@@ -1871,7 +1887,6 @@ async def main():
           /____/
     """)
     logging.info(f"RAGFlow ingestion version: {get_ragflow_version()}")
-    logging.info(f"ENABLE_DRY_RUN_COMPARISON: {os.environ.get('ENABLE_DRY_RUN_COMPARISON', '0')}")
     show_configs()
     settings.init_settings()
     settings.check_and_install_torch()
diff --git a/rag/svr/task_executor_refactor/chunk_post_processor.py b/rag/svr/task_executor_refactor/chunk_post_processor.py
index d9f0f11bf9..1705988ed5 100644
--- a/rag/svr/task_executor_refactor/chunk_post_processor.py
+++ b/rag/svr/task_executor_refactor/chunk_post_processor.py
@@ -74,8 +74,7 @@ async def extract_keywords(docs: List[Dict], ctx: TaskContext) -> None:
 
         tasks = []
         for doc in docs:
-            tasks.append(
-                asyncio.create_task(doc_keyword_extraction(chat_model, doc, ctx.parser_config["auto_keywords"])))
+            tasks.append(asyncio.create_task(doc_keyword_extraction(chat_model, doc, ctx.parser_config["auto_keywords"])))
         try:
             await asyncio.gather(*tasks, return_exceptions=False)
         except Exception as e:
@@ -116,8 +115,7 @@ async def generate_questions(docs: List[Dict], ctx: TaskContext) -> None:
 
         tasks = []
         for doc in docs:
-            tasks.append(
-                asyncio.create_task(doc_question_proposal(chat_model, doc, ctx.parser_config["auto_questions"])))
+            tasks.append(asyncio.create_task(doc_question_proposal(chat_model, doc, ctx.parser_config["auto_questions"])))
         try:
             await asyncio.gather(*tasks, return_exceptions=False)
         except Exception as e:
@@ -184,18 +182,14 @@ async def generate_metadata(docs: List[Dict], ctx: TaskContext) -> None:
         metadata_conf = build_metadata_config(ctx.parser_config)
 
         async def gen_metadata_task(chat_mdl, d):
-            cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata",
-                                   metadata_conf)
+            cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", metadata_conf)
             if not cached:
                 if ctx.has_canceled_func(ctx.id):
                     ctx.progress_cb(-1, msg="Task has been canceled.")
                     return
                 async with chat_limiter:
-                    cached = await gen_metadata(chat_mdl,
-                                                turn2jsonschema(metadata_conf),
-                                                d["content_with_weight"])
-                set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata",
-                              metadata_conf)
+                    cached = await gen_metadata(chat_mdl, turn2jsonschema(metadata_conf), d["content_with_weight"])
+                set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", metadata_conf)
             if cached:
                 d["metadata_obj"] = cached
 
@@ -256,8 +250,7 @@ async def apply_tags(docs: List[Dict], ctx: TaskContext) -> None:
             if ctx.has_canceled_func(ctx.id):
                 ctx.progress_cb(-1, msg="Task has been canceled.")
                 return
-            if settings.retriever.tag_content(tenant_id, kb_ids, doc, all_tags, topn_tags=topn_tags, S=S) and len(
-                    doc.get(TAG_FLD, [])) > 0:
+            if settings.retriever.tag_content(tenant_id, kb_ids, doc, all_tags, topn_tags=topn_tags, S=S) and len(doc.get(TAG_FLD, [])) > 0:
                 examples.append({"content": doc["content_with_weight"], TAG_FLD: doc[TAG_FLD]})
             else:
                 docs_to_tag.append(doc)
@@ -270,7 +263,7 @@ async def apply_tags(docs: List[Dict], ctx: TaskContext) -> None:
                     return
                 picked_examples = random.choices(examples, k=2) if len(examples) > 2 else examples
                 if not picked_examples:
-                    picked_examples.append({"content": "This is an example", TAG_FLD: {'example': 1}})
+                    picked_examples.append({"content": "This is an example", TAG_FLD: {"example": 1}})
                 async with chat_limiter:
                     cached = await content_tagging(
                         chat_mdl,
@@ -310,3 +303,883 @@ def count_with_key(docs: List[Dict], key: str) -> int:
         Count of docs that have the key.
     """
     return sum(1 for d in docs if d.get(key))
+
+
+# =====================================================================
+# Document post-chunking pipeline
+# ---------------------------------------------------------------------
+# Extracted from ``task_handler`` to keep the handler class small.
+# The public entry point is :func:`run_document_post_chunking_if_last`;
+# everything below is called (transitively) from there:
+#   run_document_post_chunking_if_last
+#     ├─ run_document_structure_compile
+#     │    ├─ run_tree_templates
+#     │    │    ├─ load_chunks_with_vec
+#     │    │    ├─ rechunk_doc_by_tree
+#     │    │    └─ raptor_tree_to_graph
+#     │    └─ (streaming compile via chat models per template)
+#     └─ handler._run_raptor       ← stays on the handler
+#
+# All entries take ``handler`` (``TaskHandler``) as their first arg so
+# they can reach the handler's ``_task_context``, ``_run_raptor``, and
+# ``_load_chunks_for_doc`` without a circular import.
+# =====================================================================
+
+import numpy as np  # noqa: E402
+from typing import Callable, Optional  # noqa: E402
+
+from common.exceptions import TaskCanceledException  # noqa: E402
+from common.misc_utils import thread_pool_exec  # noqa: E402
+from common.token_utils import num_tokens_from_string  # noqa: E402
+from rag.nlp import search  # noqa: E402
+from api.apps.restful_apis.chunk_api import _compilation_template_kind  # noqa: E402
+from api.db.services.document_service import DocumentService  # noqa: E402
+from api.db.services.compilation_template_service import (  # noqa: E402
+    CompilationTemplateService,
+)
+from api.db.services.compilation_template_group_service import (  # noqa: E402
+    CompilationTemplateGroupService,
+)
+from api.db.services.task_service import (  # noqa: E402
+    abort_doc_chunking_counter,
+    clear_doc_chunking_counter,
+    credit_doc_chunking_task,
+    is_doc_chunking_aborted,
+)
+from rag.advanced_rag.knowlege_compile.structure import (  # noqa: E402
+    CHAIN_KINDS,
+    compile_structure_from_text,
+    merge_compiled_structures,
+    validate_and_correct_chain,
+)
+
+
+# ----- tunables ------------------------------------------------------
+# Bound how many source chunks are handed to a single
+# ``compile_structure_from_text`` invocation. The call fans them out
+# across max_workers internally, so a moderate window keeps memory +
+# LLM-context pressure predictable for long docs.
+DOC_STRUCTURE_COMPILE_BATCH_CHUNKS = 4
+
+# Bound how many compiled ES-ready docs may accumulate before we flush
+# them through ``merge_compiled_structures``. The merger does pairwise
+# cosine + LLM duplicate-judging, so it's the more expensive step; we
+# cap the per-flush set to keep the local-dedup buckets tractable.
+DOC_STRUCTURE_MERGE_MAX_DOCS = 512
+
+# Hard wall on the chain-validator LLM correction step. ``list`` and
+# ``timeline`` kinds run this just before each merge flush; anything
+# longer than this is treated as a blocked LLM and the uncorrected
+# docs are flushed instead.
+STRUCTURE_CHAIN_CORRECTION_TIMEOUT_S = 120.0
+
+
+# ----- parser_config helpers -----------------------------------------
+# Duplicated from ``task_handler`` so this module stays free of a
+# reverse import (task_handler → this module via dispatch; the other
+# direction would be circular).
+
+
+def _parser_config_compilation_template_group_ids(parser_config) -> list[str]:
+    def _normalize(raw) -> list[str]:
+        if isinstance(raw, str):
+            raw = [raw]
+        if not isinstance(raw, list):
+            return []
+        ids: list[str] = []
+        seen: set[str] = set()
+        for gid in raw:
+            if not isinstance(gid, str):
+                continue
+            gid = gid.strip()
+            if gid and gid not in seen:
+                seen.add(gid)
+                ids.append(gid)
+        return ids
+
+    if not isinstance(parser_config, dict):
+        return []
+    if "compilation_template_group_id" in parser_config:
+        return _normalize(parser_config.get("compilation_template_group_id"))
+    ext = parser_config.get("ext")
+    if isinstance(ext, dict):
+        return _normalize(ext.get("compilation_template_group_id"))
+    return []
+
+
+def _parser_config_compilation_template_ids(parser_config, tenant_id: str) -> list[str]:
+    template_ids: list[str] = []
+    seen: set[str] = set()
+    for group_id in _parser_config_compilation_template_group_ids(parser_config):
+        for template_id in CompilationTemplateGroupService.resolve_template_ids(
+            group_id,
+            tenant_id,
+        ):
+            if template_id in seen:
+                continue
+            seen.add(template_id)
+            template_ids.append(template_id)
+    return template_ids
+
+
+def _resolve_template_chat_llm_id(parser_cfg: dict, ctx) -> str:
+    """Pick the chat model id for a knowledge-compilation template.
+
+    Resolution order: template ``llm_id`` → doc ``parser_config.llm_id``
+    → ``ctx.llm_id`` (the chunking task's default).
+    """
+    if isinstance(parser_cfg, dict):
+        tid = parser_cfg.get("llm_id")
+        if isinstance(tid, str) and tid.strip():
+            return tid.strip()
+    doc_cfg = getattr(ctx, "parser_config", None) or {}
+    if isinstance(doc_cfg, dict):
+        did = doc_cfg.get("llm_id")
+        if isinstance(did, str) and did.strip():
+            return did.strip()
+    return ctx.llm_id
+
+
+# ----- progress helper -----------------------------------------------
+
+
+def cap_done_progress(progress_cb: Callable) -> Callable:
+    """Wrap a progress callback so any ``prog >= 1`` gets clamped to
+    ``0.99`` — the final ``1.0`` is reserved for the caller who owns
+    the task's terminal state."""
+
+    def capped_progress(*args, **kwargs):
+        args = list(args)
+        if args:
+            prog = args[0]
+            if isinstance(prog, (int, float)) and not isinstance(prog, bool) and prog >= 1:
+                args[0] = 0.99
+        if "prog" in kwargs:
+            prog = kwargs["prog"]
+            if isinstance(prog, (int, float)) and not isinstance(prog, bool) and prog >= 1:
+                kwargs["prog"] = 0.99
+        return progress_cb(*args, **kwargs)
+
+    return capped_progress
+
+
+# ----- tree helpers --------------------------------------------------
+
+
+def raptor_tree_to_graph(tree: Dict) -> Dict:
+    """Project a RAPTOR tree dict (from ``Raptor(is_tree=True)``) onto
+    the ``{entities, relations}`` shape the document-structure graph
+    endpoint already serves for ``page_index``-kind rows."""
+    entities: list[dict] = []
+    relations: list[dict] = []
+
+    def _walk(node: dict, parent_id: Optional[str]) -> None:
+        if not isinstance(node, dict):
+            return
+        title = node.get("title") or ""
+        node_id = title
+        ent: dict = {
+            "name": node_id,
+            "type": "tree_node",
+            "description": node.get("description", title),
+            "mention_count": 1,
+        }
+        src_ids = node.get("source_chunk_ids")
+        if isinstance(src_ids, list) and src_ids:
+            ent["source_chunk_ids"] = [s for s in src_ids if isinstance(s, str) and s]
+        entities.append(ent)
+        if parent_id is not None:
+            relations.append({"from": parent_id, "to": node_id, "type": "child"})
+        for child in node.get("children") or []:
+            _walk(child, node_id)
+
+    _walk(tree, None)
+    return {"entities": entities, "relations": relations}
+
+
+async def load_chunks_with_vec(
+    tenant_id: str,
+    kb_id: str,
+    doc_id: str,
+    vctr_nm: str,
+) -> list[tuple[str, "np.ndarray", str]]:
+    """Page through this doc's chunks pulling content + vector +
+    chunk_id, in the shape ``RaptorService.build_doc_tree`` expects.
+    Mirrors the streaming ``_load_chunks_for_doc`` loader but with the
+    vector field pre-selected."""
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    index_nm = search.index_name(tenant_id)
+    if not settings.docStoreConn.index_exist(index_nm, kb_id):
+        return []
+    select_fields = ["id", "doc_id", "content_with_weight", vctr_nm]
+    order_by = OrderByExpr()
+    order_by.asc("page_num_int")
+    order_by.asc("top_int")
+
+    out: list[tuple[str, "np.ndarray", str]] = []
+    offset = 0
+    PAGE = 500
+    while True:
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                {"doc_id": [doc_id], "available_int": 1},
+                [],
+                order_by,
+                offset,
+                PAGE,
+                index_nm,
+                [kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception(
+                "tree-template: failed to load chunks for doc=%s",
+                doc_id,
+            )
+            break
+        if not field_map:
+            break
+        for row_id, row in field_map.items():
+            if row.get("compile_kwd"):
+                continue
+            text = row.get("content_with_weight") or ""
+            vec = row.get(vctr_nm)
+            if not text or vec is None:
+                continue
+            try:
+                arr = np.asarray(vec, dtype=np.float32)
+            except Exception:
+                continue
+            if arr.size == 0:
+                continue
+            out.append((text, arr, str(row_id)))
+        if len(field_map) < PAGE:
+            break
+        offset += PAGE
+    return out
+
+
+async def rechunk_doc_by_tree(
+    handler,
+    tree: dict,
+    template_id: str,
+    embedding_model,
+) -> None:
+    """Merge each leaf cluster's source chunks into a single
+    replacement chunk and rewrite the tree's leaf-cluster
+    ``source_chunk_ids`` in-place. Original chunks are soft-deleted
+    via ``available_int=0`` and stamped with ``superseded_by_chunk_id``.
+    """
+    from datetime import datetime
+    from common.misc_utils import get_uuid
+
+    ctx = handler._task_context
+
+    cluster_id_map: dict[int, tuple[dict, list[str]]] = {}
+
+    def _is_terminal(node: object) -> bool:
+        return isinstance(node, dict) and not (node.get("children") or [])
+
+    def _walk(node: object) -> None:
+        if not isinstance(node, dict):
+            return
+        children = node.get("children") or []
+        if children and all(_is_terminal(c) for c in children):
+            src_ids: list[str] = []
+            seen: set[str] = set()
+            for c in children:
+                for cid in c.get("source_chunk_ids") or []:
+                    if isinstance(cid, str) and cid and cid not in seen:
+                        seen.add(cid)
+                        src_ids.append(cid)
+            for cid in node.get("source_chunk_ids") or []:
+                if isinstance(cid, str) and cid and cid not in seen:
+                    seen.add(cid)
+                    src_ids.append(cid)
+            if src_ids:
+                cluster_id_map[id(node)] = (node, src_ids)
+        else:
+            for c in children:
+                _walk(c)
+
+    _walk(tree)
+    if not cluster_id_map:
+        return
+
+    all_source_ids = sorted({sid for _, ids in cluster_id_map.values() for sid in ids})
+
+    from common.doc_store.doc_store_base import OrderByExpr
+
+    index_nm = search.index_name(ctx.tenant_id)
+    if not settings.docStoreConn.index_exist(index_nm, ctx.kb_id):
+        return
+
+    vctr_nm = "q_%d_vec" % len(embedding_model.encode(["x"])[0][0])
+    select_fields = [
+        "id",
+        "doc_id",
+        "kb_id",
+        "content_with_weight",
+        "page_num_int",
+        "top_int",
+        "position_int",
+        "docnm_kwd",
+        "title_tks",
+        "title_sm_tks",
+        "available_int",
+    ]
+    try:
+        res = await thread_pool_exec(
+            settings.docStoreConn.search,
+            select_fields,
+            [],
+            {"id": all_source_ids, "available_int": 1},
+            [],
+            OrderByExpr(),
+            0,
+            len(all_source_ids) + 16,
+            index_nm,
+            [ctx.kb_id],
+        )
+        field_map = settings.docStoreConn.get_fields(res, select_fields)
+    except Exception:
+        logging.exception(
+            "rechunk: failed to load source chunks for doc=%s template=%s",
+            ctx.doc_id,
+            template_id,
+        )
+        return
+    if not field_map:
+        return
+
+    chunks_by_id: dict[str, dict] = {str(rid): {**row, "id": str(rid)} for rid, row in field_map.items()}
+
+    merged_rows: list[dict] = []
+    cluster_new_id: dict[int, str] = {}
+
+    for node_id_int, (node, src_ids) in cluster_id_map.items():
+        cluster_chunks = [chunks_by_id[c] for c in src_ids if c in chunks_by_id]
+        if not cluster_chunks:
+            continue
+
+        def _sort_key(c: dict) -> tuple:
+            pages = c.get("page_num_int") or [0]
+            tops = c.get("top_int") or [0]
+            return (
+                min(pages) if pages else 0,
+                min(tops) if tops else 0,
+                c.get("id") or "",
+            )
+
+        cluster_chunks.sort(key=_sort_key)
+
+        merged_content = "\n\n".join((c.get("content_with_weight") or "") for c in cluster_chunks).strip()
+        if not merged_content:
+            continue
+        page_union = sorted({p for c in cluster_chunks for p in (c.get("page_num_int") or [])})
+        top_union = sorted({t for c in cluster_chunks for t in (c.get("top_int") or [])})
+
+        base = dict(cluster_chunks[0])
+        new_id = get_uuid()
+        cluster_new_id[node_id_int] = new_id
+
+        base.update(
+            {
+                "id": new_id,
+                "content_with_weight": merged_content,
+                "content_ltks": rag_tokenizer.tokenize(merged_content),
+                "page_num_int": page_union,
+                "top_int": top_union,
+                "available_int": 1,
+                "rechunk_kwd": "tree",
+                "rechunked_from_template_id": template_id,
+                "rechunked_from_chunk_ids": [c.get("id") for c in cluster_chunks if c.get("id")],
+                "token_num": num_tokens_from_string(merged_content),
+                "create_time": str(datetime.now()).replace("T", " ")[:19],
+                "create_timestamp_flt": datetime.now().timestamp(),
+            }
+        )
+        base["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(base["content_ltks"])
+        merged_rows.append(base)
+
+    if not merged_rows:
+        return
+
+    contents = [r["content_with_weight"] for r in merged_rows]
+    try:
+        vectors, _ = embedding_model.encode(contents)
+    except Exception:
+        logging.exception(
+            "rechunk: embedding failed for doc=%s template=%s",
+            ctx.doc_id,
+            template_id,
+        )
+        return
+    for row, vec in zip(merged_rows, vectors):
+        try:
+            row[vctr_nm] = np.asarray(vec, dtype=np.float32).tolist()
+        except Exception:
+            logging.exception(
+                "rechunk: vector cast failed; skipping row %s",
+                row.get("id"),
+            )
+            row[vctr_nm] = None
+    merged_rows = [r for r in merged_rows if r.get(vctr_nm) is not None]
+    if not merged_rows:
+        return
+
+    try:
+        await thread_pool_exec(
+            settings.docStoreConn.insert,
+            merged_rows,
+            index_nm,
+            ctx.kb_id,
+        )
+    except Exception:
+        logging.exception(
+            "rechunk: insert failed for doc=%s template=%s",
+            ctx.doc_id,
+            template_id,
+        )
+        return
+
+    for node_id_int, new_chunk_id in cluster_new_id.items():
+        node, _ = cluster_id_map[node_id_int]
+        node["source_chunk_ids"] = [new_chunk_id]
+        for child in node.get("children") or []:
+            if isinstance(child, dict):
+                child["source_chunk_ids"] = [new_chunk_id]
+
+    for node_id_int, new_chunk_id in cluster_new_id.items():
+        _, src_ids = cluster_id_map[node_id_int]
+        for cid in src_ids:
+            try:
+                await thread_pool_exec(
+                    settings.docStoreConn.update,
+                    {"id": cid},
+                    {
+                        "available_int": 0,
+                        "superseded_by_chunk_id": new_chunk_id,
+                    },
+                    index_nm,
+                    ctx.kb_id,
+                )
+            except Exception:
+                logging.exception(
+                    "rechunk: soft-delete failed for chunk=%s (merged=%s)",
+                    cid,
+                    new_chunk_id,
+                )
+
+
+async def run_tree_templates(
+    handler,
+    templates: list[tuple[str, dict]],
+    chat_mdl_by_tid: dict[str, "LLMBundle"],
+    embedding_model,
+) -> None:
+    """Run the ``tree``-kind compilation templates for the current
+    doc. Each pair runs RAPTOR with ``is_tree=True`` via
+    ``RaptorService.build_doc_tree`` and persists a single graph row
+    via ``_struct_upsert_graph_json``."""
+    from rag.svr.task_executor_refactor.raptor_service import RaptorService
+    from rag.advanced_rag.knowlege_compile.structure import _struct_upsert_graph_json
+
+    ctx = handler._task_context
+    progress_cb = ctx.progress_cb
+
+    try:
+        doc_id = ctx.doc_id
+    except Exception:
+        doc_id = getattr(ctx, "_task", {}).get("doc_id") if hasattr(ctx, "_task") else None
+    if not doc_id:
+        logging.warning("tree-template: no doc_id on task context; skipping")
+        return
+
+    vctr_nm = "q_%d_vec" % len(embedding_model.encode(["x"])[0][0])
+    chunks = await load_chunks_with_vec(
+        ctx.tenant_id,
+        ctx.kb_id,
+        doc_id,
+        vctr_nm,
+    )
+    if not chunks:
+        progress_cb(msg=f"tree-template: doc {doc_id} has no chunks; skipping")
+        return
+
+    raptor_service = RaptorService(ctx)
+
+    for idx, (template_id, parser_cfg) in enumerate(templates):
+        raptor_cfg = (parser_cfg or {}).get("raptor") or {}
+        raptor_config = {
+            "prompt": raptor_cfg.get("prompt") or "Please write a concise summary of the following texts:\n{cluster_content}",
+            "max_token": int(raptor_cfg.get("max_token") or 512),
+            "threshold": float(raptor_cfg.get("threshold") or 0.1),
+            "random_seed": int(raptor_cfg.get("random_seed") or 0),
+            "max_cluster": int(raptor_cfg.get("max_cluster") or 64),
+            "ext": raptor_cfg.get("ext") or {},
+        }
+        progress_cb(
+            msg=f"tree-template ({idx + 1}/{len(templates)}): building tree for doc={doc_id}",
+        )
+        try:
+            tree = await raptor_service.build_doc_tree(
+                chunks=chunks,
+                raptor_config=raptor_config,
+                chat_mdl=chat_mdl_by_tid[template_id],
+                embd_mdl=embedding_model,
+                tree_builder="raptor",
+                clustering_method="gmm",
+                max_errors=3,
+            )
+        except Exception:
+            logging.exception(
+                "tree-template %s: RAPTOR build failed for doc %s",
+                template_id,
+                doc_id,
+            )
+            continue
+        if tree is None:
+            logging.info(
+                "tree-template %s: no tree produced for doc %s",
+                template_id,
+                doc_id,
+            )
+            continue
+
+        if bool((raptor_cfg or {}).get("rechunk")):
+            try:
+                await rechunk_doc_by_tree(
+                    handler=handler,
+                    tree=tree,
+                    template_id=template_id,
+                    embedding_model=embedding_model,
+                )
+            except Exception:
+                logging.exception(
+                    "tree-template %s: re-chunking failed for doc %s; persisting tree with original chunk ids",
+                    template_id,
+                    doc_id,
+                )
+
+        graph = raptor_tree_to_graph(tree)
+        try:
+            await _struct_upsert_graph_json(
+                graph,
+                ctx.tenant_id,
+                ctx.kb_id,
+                doc_id,
+                compile_kwd="tree",
+                compilation_template_id=template_id,
+            )
+        except Exception:
+            logging.exception(
+                "tree-template %s: graph upsert failed for doc %s",
+                template_id,
+                doc_id,
+            )
+            continue
+
+        try:
+            from rag.advanced_rag.knowlege_compile.dataset_nav import (
+                upsert_dataset_nav_doc,
+            )
+
+            await upsert_dataset_nav_doc(
+                ctx.tenant_id,
+                ctx.kb_id,
+                doc_id,
+                tree,
+            )
+        except Exception:
+            logging.exception(
+                "tree-template %s: dataset_nav upsert failed for doc %s",
+                template_id,
+                doc_id,
+            )
+
+        progress_cb(
+            msg=f"tree-template ({idx + 1}/{len(templates)}): persisted {len(graph['entities'])} node(s), {len(graph['relations'])} edge(s) for doc {doc_id}",
+        )
+
+
+async def run_document_structure_compile(handler, embedding_model: LLMBundle) -> None:
+    """Run document-scoped knowledge compilation for non-artifact
+    templates. Streams the doc's chunks (via
+    ``handler._load_chunks_for_doc``) and fans each batch out to every
+    configured non-artifact template, flushing accumulators through
+    ``merge_compiled_structures`` at :data:`DOC_STRUCTURE_MERGE_MAX_DOCS`.
+    """
+    ctx = handler._task_context
+    template_ids = _parser_config_compilation_template_ids(ctx.parser_config, ctx.tenant_id)
+    if not template_ids:
+        return
+
+    active_templates: list[tuple[str, dict]] = []
+    for template_id in template_ids:
+        template = CompilationTemplateService.get_saved(template_id, ctx.tenant_id)
+        if not template:
+            logging.warning(
+                "document_structure_compile: template %s not found",
+                template_id,
+            )
+            continue
+        parser_cfg = template.get("config") or {}
+        if not isinstance(parser_cfg, dict):
+            logging.warning(
+                "document_structure_compile: template %s config is invalid",
+                template_id,
+            )
+            continue
+        kind = _compilation_template_kind(parser_cfg.get("kind"))
+        if not kind or kind == "artifacts":
+            continue
+        active_templates.append((template_id, parser_cfg))
+
+    if not active_templates:
+        return
+
+    llm_bundle_cache: dict[str, LLMBundle] = {}
+    chat_mdl_by_tid: dict[str, LLMBundle] = {}
+    filtered_templates: list[tuple[str, dict]] = []
+    for template_id, parser_cfg in active_templates:
+        chat_llm_id = _resolve_template_chat_llm_id(parser_cfg, ctx)
+        if chat_llm_id not in llm_bundle_cache:
+            try:
+                cfg = get_model_config_from_provider_instance(
+                    ctx.tenant_id,
+                    LLMType.CHAT,
+                    chat_llm_id,
+                )
+                llm_bundle_cache[chat_llm_id] = LLMBundle(
+                    ctx.tenant_id,
+                    cfg,
+                    lang=ctx.language,
+                )
+            except Exception:
+                logging.exception(
+                    "document_structure_compile: cannot resolve chat model %s for template %s; skipping",
+                    chat_llm_id,
+                    template_id,
+                )
+                continue
+        chat_mdl_by_tid[template_id] = llm_bundle_cache[chat_llm_id]
+        filtered_templates.append((template_id, parser_cfg))
+
+    if not filtered_templates:
+        return
+    active_templates = filtered_templates
+
+    tree_templates: list[tuple[str, dict]] = []
+    non_tree_templates: list[tuple[str, dict]] = []
+    for tid, cfg in active_templates:
+        if _compilation_template_kind((cfg or {}).get("kind")) == "tree":
+            tree_templates.append((tid, cfg))
+        else:
+            non_tree_templates.append((tid, cfg))
+
+    if tree_templates:
+        await run_tree_templates(
+            handler,
+            tree_templates,
+            chat_mdl_by_tid,
+            embedding_model,
+        )
+
+    if not non_tree_templates:
+        return
+    active_templates = non_tree_templates
+
+    progress_cb = ctx.progress_cb
+    total = len(active_templates)
+
+    accumulators: dict[str, list[dict]] = {tid: [] for tid, _ in active_templates}
+    template_kinds: dict[str, str] = {tid: _compilation_template_kind((cfg or {}).get("kind")) for tid, cfg in active_templates}
+    agg_infos: dict[str, dict] = {tid: {"inserted": 0, "updated": 0, "duplicates_dropped": 0} for tid, _ in active_templates}
+    chunks_by_id: dict[str, str] = {}
+
+    async def _flush(template_id: str) -> None:
+        acc = accumulators[template_id]
+        if not acc:
+            return
+        kind = template_kinds.get(template_id, "")
+        if kind in CHAIN_KINDS:
+            try:
+                acc = await asyncio.wait_for(
+                    validate_and_correct_chain(
+                        acc,
+                        chunks_by_id,
+                        chat_mdl_by_tid[template_id],
+                        kind,
+                        callback=progress_cb,
+                    ),
+                    timeout=STRUCTURE_CHAIN_CORRECTION_TIMEOUT_S,
+                )
+                accumulators[template_id] = acc
+            except asyncio.TimeoutError:
+                logging.warning(
+                    "chain validate: timed out after %ss for template %s; using uncorrected docs",
+                    STRUCTURE_CHAIN_CORRECTION_TIMEOUT_S,
+                    template_id,
+                )
+            except Exception:
+                logging.exception(
+                    "chain validate: unexpected failure for template %s; using uncorrected docs",
+                    template_id,
+                )
+        info = await merge_compiled_structures(
+            acc,
+            chat_mdl_by_tid[template_id],
+            embedding_model,
+            ctx.tenant_id,
+            ctx.kb_id,
+            compilation_template_id=template_id,
+            cancel_check=lambda: ctx.has_canceled_func(ctx.id),
+        )
+        acc.clear()
+        if isinstance(info, dict):
+            agg = agg_infos[template_id]
+            for k in ("inserted", "updated", "duplicates_dropped"):
+                agg[k] = agg.get(k, 0) + int(info.get(k, 0) or 0)
+
+    progress_cb(msg=f"Start document knowledge compilation ({total} template(s)) ...")
+
+    batch_no = 0
+    async for batch in handler._load_chunks_for_doc(
+        ctx.tenant_id,
+        ctx.kb_id,
+        ctx.doc_id,
+        batch_size=DOC_STRUCTURE_COMPILE_BATCH_CHUNKS,
+    ):
+        batch_no += 1
+        for chunk in batch:
+            cid = chunk.get("id")
+            if isinstance(cid, str) and cid not in chunks_by_id:
+                text = chunk.get("content_with_weight") or ""
+                chunks_by_id[cid] = text if isinstance(text, str) else ""
+        for idx, (template_id, parser_cfg) in enumerate(active_templates):
+            progress_cb(msg=f"  compile batch {batch_no} ({len(batch)} chunks) for template ({idx + 1}/{total})")
+            docs = await compile_structure_from_text(
+                batch,
+                parser_cfg,
+                chat_mdl_by_tid[template_id],
+                embedding_model,
+                ctx.doc_id,
+                language=ctx.language,
+                callback=progress_cb,
+                compilation_template_id=template_id,
+            )
+            if docs:
+                accumulators[template_id].extend(docs)
+            if len(accumulators[template_id]) >= DOC_STRUCTURE_MERGE_MAX_DOCS:
+                progress_cb(msg=f"  merge flush ({len(accumulators[template_id])} docs) for template ({idx + 1}/{total})")
+                await _flush(template_id)
+
+    for idx, (template_id, _parser_cfg) in enumerate(active_templates):
+        if ctx.has_canceled_func(ctx.id):
+            raise TaskCanceledException(f"Task {ctx.id} was cancelled during document knowledge compilation")
+        await _flush(template_id)
+        agg = agg_infos[template_id]
+        ctx.recording_context.record(f"document_structure_compile:{template_id}", agg)
+        progress_cb(msg=f"Document knowledge compilation done ({idx + 1}/{total}): {agg}")
+
+
+async def run_document_post_chunking_if_last(
+    handler,
+    embedding_model: LLMBundle,
+    vector_size: int,
+    task_start_ts: float,
+    chunks_len: int,
+    token_count: int,
+) -> bool:
+    """Gate: only the last chunking task for a doc runs post-processing.
+    Returns ``True`` if the caller may proceed to its own terminal
+    progress update, ``False`` if the task was cancelled.
+
+    The pass runs :func:`run_document_structure_compile` and
+    ``handler._run_raptor`` concurrently — they read the same chunks
+    but write disjoint ES rows.
+    """
+    ctx = handler._task_context
+    task_id = ctx.id
+    task_doc_id = ctx.doc_id
+
+    if ctx.has_canceled_func(task_id):
+        abort_doc_chunking_counter(task_doc_id)
+        ctx.progress_cb(-1, msg="Task has been canceled.")
+        return False
+
+    chunking_aborted = is_doc_chunking_aborted(task_doc_id)
+    remaining_chunking_tasks = (
+        0 if ctx.write_interceptor
+        else credit_doc_chunking_task(task_doc_id, task_id)
+    )
+    if remaining_chunking_tasks != 0:
+        if chunking_aborted:
+            logging.info(
+                "Chunking for doc %s was aborted before task %s reached post-processing; "
+                "skip document finalizers.",
+                task_doc_id,
+                task_id,
+            )
+        elif remaining_chunking_tasks is not None and remaining_chunking_tasks < 0:
+            logging.warning(
+                "Chunking counter for doc %s is missing or expired after task %s; skip post-processing to avoid duplicate finalizers.",
+                task_doc_id,
+                task_id,
+            )
+        else:
+            logging.info(
+                "Chunk doc(%s), page(%s-%s), chunks(%s), token(%s), elapsed:%.2f; waiting for %s chunking task(s) before post-processing",
+                ctx.name,
+                ctx.from_page,
+                ctx.to_page,
+                chunks_len,
+                token_count,
+                timer() - task_start_ts,
+                remaining_chunking_tasks,
+            )
+        return True
+
+    async def _maybe_run_raptor():
+        raptor_cfg = (ctx.parser_config or {}).get("raptor") or {}
+        if not raptor_cfg.get("use_raptor"):
+            return
+        try:
+            ok_doc, doc_obj = DocumentService.get_by_id(task_doc_id)
+            if ok_doc and doc_obj is not None:
+                ctx.progress_cb(msg="Starting RAPTOR task.")
+                await handler._run_raptor(embedding_model, vector_size, mark_done=False)
+            else:
+                logging.warning(
+                    "raptor: cannot resolve doc %s to queue per-doc task",
+                    task_doc_id,
+                )
+        except Exception:
+            logging.exception(
+                "raptor: failed to queue per-doc task for doc %s",
+                task_doc_id,
+            )
+
+    original_progress_cb = getattr(ctx, "_progress_cb", None)
+    if original_progress_cb is not None:
+        ctx._progress_cb = cap_done_progress(original_progress_cb)
+    try:
+        await asyncio.gather(
+            run_document_structure_compile(handler, embedding_model),
+            _maybe_run_raptor(),
+        )
+    finally:
+        if original_progress_cb is not None:
+            ctx._progress_cb = original_progress_cb
+        clear_doc_chunking_counter(task_doc_id)
+
+    if ctx.has_canceled_func(task_id):
+        abort_doc_chunking_counter(task_doc_id)
+        ctx.progress_cb(-1, msg="Task has been canceled.")
+        return False
+    return True
diff --git a/rag/svr/task_executor_refactor/dataset_skill_generator.py b/rag/svr/task_executor_refactor/dataset_skill_generator.py
new file mode 100644
index 0000000000..c447d1fb03
--- /dev/null
+++ b/rag/svr/task_executor_refactor/dataset_skill_generator.py
@@ -0,0 +1,588 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+"""Corpus → Skill tree generator.
+
+Extracted from ``rag.svr.task_executor_refactor.task_handler`` where the
+same pipeline previously lived as a set of ``_skill_*`` methods and one
+``_corpus2skill`` orchestrator. The public entry point is
+:func:`run_corpus2skill`; the module-level helpers are internal but
+kept accessible so tests can exercise the individual phases.
+
+Design notes:
+
+* The pipeline is per-KB: given every parsed doc in a KB it produces a
+  hierarchical "skill" tree by summarizing each doc, RAPTOR-clustering
+  the summaries, summarizing each cluster, then repeating until the top
+  fan-out is at or below :data:`SKILL_MAX_TOP_CLUSTERS`.
+* Each node lands in ES twice: one per-node row under
+  ``compile_kwd="skill"`` carrying markdown metadata + a leaf-vs-branch
+  contents section, and one aggregate row under
+  ``compile_kwd="skill_all"`` holding the whole recursive tree as JSON
+  for cheap sidebar reads.
+* The layout mirrors Corpus2Skill's ``SKILL.md`` / ``INDEX.md`` naming
+  so a future on-disk export is a straight projection.
+
+The extraction keeps the callable surface minimal:
+
+    run_corpus2skill(ctx, embedding_model, load_chunks_for_doc)
+
+``load_chunks_for_doc`` is injected rather than imported to keep the
+module decoupled from ``TaskHandler``'s streaming chunk loader — any
+async iterator that yields batches of ``{content_with_weight: str, ...}``
+dicts will do.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import AsyncIterator, Callable, Dict, Optional
+
+import numpy as np
+import xxhash
+
+from common import settings
+from common.constants import LLMType
+from common.misc_utils import thread_pool_exec
+from common.token_utils import num_tokens_from_string
+from rag.nlp import search
+from rag.svr.task_executor_refactor.task_context import TaskContext
+
+
+# ----- tunables ------------------------------------------------------
+# Stop folding clusters once we've boiled the KB down to ≤ this many
+# top-level nodes. Mirrors Corpus2Skill's default top-of-tree fan-out.
+SKILL_MAX_TOP_CLUSTERS = 8
+# Per-doc summary is built from a budget of this fraction of the chat
+# model's context window. Stops adding chunks once cumulative tokens
+# hit the cap.
+SKILL_DOC_BUDGET_FRACTION = 0.5
+# Concurrency caps for the two LLM-bound stages.
+SKILL_DOC_SUMMARY_CONCURRENCY = 8
+SKILL_LABEL_CONCURRENCY = 10
+# Defensive cap on the clustering loop — a degenerate clustering that
+# keeps returning N clusters for N inputs would otherwise loop forever.
+SKILL_MAX_TREE_ITERATIONS = 12
+# Page size for the streaming chunk reader used during per-doc
+# summarization.
+SKILL_CHUNK_BATCH = 64
+
+
+# A ``load_chunks_for_doc`` callable takes ``(tenant_id, kb_id, doc_id,
+# batch_size)`` and returns an async iterator of chunk-batch lists.
+ChunkLoader = Callable[
+    [str, str, str],  # positional: tenant_id, kb_id, doc_id
+    AsyncIterator[list[dict]],
+]
+
+
+@dataclass
+class SkillNode:
+    """One node in the corpus → skill hierarchy.
+
+    Leaves wrap a single doc; branch nodes wrap a cluster of child
+    nodes plus the LLM summary of their summaries. ``doc_ids`` is the
+    flattened leaf set under this node, so any branch can quickly
+    report ``num_documents``. ``doc_texts`` carries first-page
+    previews keyed by ``doc_id`` only for leaves whose markdown nav
+    file needs the first-line title — branches inherit it as a no-op
+    union so the dict is non-empty across the tree.
+    """
+
+    level: int
+    label: str
+    summary: str
+    vec: np.ndarray
+    doc_ids: list[str]
+    doc_texts: dict[str, str] = field(default_factory=dict)
+    children: list["SkillNode"] = field(default_factory=list)
+    folder_name: str = ""
+
+
+# ----- helpers -------------------------------------------------------
+
+
+def skill_safe_name(text: str, max_len: int = 50) -> str:
+    """Lowercase, hyphen-only, max-len-clamped slug. Mirrors
+    Corpus2Skill ``_safe_name`` for cross-system stability of folder
+    names."""
+    name = (text or "").lower().strip()
+    name = re.sub(r"[^a-z0-9\s-]", "", name)
+    name = re.sub(r"\s+", "-", name)
+    name = name.strip("-")[:max_len]
+    return name
+
+
+async def label_skill_node_one(
+    summary: str,
+    chat_mdl,
+    semaphore: asyncio.Semaphore,
+) -> str:
+    """Generate a single fs-safe label: 2–5 word lowercase
+    hyphenated label, max_tokens=20, sanitized to [a-z0-9-], capped
+    at 50 chars, falls back to "cluster" on any failure.
+    """
+    async with semaphore:
+        try:
+            cnt = await chat_mdl.async_chat(
+                "You generate short filesystem-safe cluster labels. Reply with the label only.",
+                [
+                    {
+                        "role": "user",
+                        "content": (
+                            "Generate a short (2-5 word) filesystem-safe label for this cluster. "
+                            "Use lowercase(MUST be in the same language as 'Summary'), hyphens instead of spaces. No quotes.\n\n"
+                            f"Summary: {(summary or '')[:500]}"
+                        ),
+                    }
+                ],
+                {"max_tokens": 20, "temperature": 0.0},
+            )
+            raw = (cnt or "").strip().lower()
+            label = re.sub(r"[^a-z0-9-]", "-", raw)
+            label = re.sub(r"-+", "-", label).strip("-")[:50]
+            return label or "cluster"
+        except Exception:
+            logging.exception("skill: label generation failed; using fallback")
+            return "cluster"
+
+
+async def doc_summary_for_skill(
+    doc_id: str,
+    raptor,
+    chat_mdl,
+    ctx: TaskContext,
+    load_chunks_for_doc: Callable[..., AsyncIterator[list[dict]]],
+) -> Optional["SkillNode"]:
+    """Concatenate chunks up to half the chat model's context budget,
+    then summarize via RAPTOR's ``_summarize_texts`` (which also
+    returns the embedding). Returns a leaf-shaped :class:`SkillNode`
+    or ``None`` if the doc has no usable chunks."""
+    max_ctx = int(getattr(chat_mdl, "max_length", 4096) or 4096)
+    budget = max(512, int(max_ctx * SKILL_DOC_BUDGET_FRACTION))
+
+    accumulated: list[str] = []
+    running = 0
+    async for batch in load_chunks_for_doc(
+        ctx.tenant_id,
+        ctx.kb_id,
+        doc_id,
+        batch_size=SKILL_CHUNK_BATCH,
+    ):
+        for chunk in batch:
+            text = chunk.get("content_with_weight") or ""
+            if not isinstance(text, str) or not text:
+                continue
+            t_tokens = num_tokens_from_string(text)
+            if running + t_tokens > budget and accumulated:
+                break
+            accumulated.append(text)
+            running += t_tokens
+        else:
+            continue
+        break
+
+    if not accumulated:
+        return None
+
+    result = await raptor._summarize_texts(accumulated, callback=None, task_id="")
+    if result is None:
+        return None
+    title, summary_text, vec = result
+
+    doc_preview = accumulated[0][:600] if accumulated else ""
+    return SkillNode(
+        level=0,
+        label="",  # filled in phase 5
+        summary=summary_text or title,
+        vec=np.asarray(vec),
+        doc_ids=[doc_id],
+        doc_texts={doc_id: doc_preview},
+        children=[],
+        folder_name="",  # filled in phase 5
+    )
+
+
+def build_skill_md(node: "SkillNode") -> str:
+    """SKILL.md (depth 0) / INDEX.md (deeper) text. Mirrors
+    Corpus2Skill's ``_format_skill_md`` (skill_builder.py:193): YAML
+    frontmatter (name / description / level / num_documents), then
+    ``## Overview`` with the full summary, then ``## Contents`` —
+    sub-groups for branches, ``- `doc_id`: <first 120 chars>`` for
+    leaves.
+    """
+    depth = node.level
+    name = node.folder_name or node.label or f"cluster-{depth}"
+    desc = (node.summary or "")[:300].replace("\n", " ").strip()
+    lines: list[str] = [
+        "---",
+        f"name: {name}",
+        "description: >",
+        f"  {desc}",
+        f"level: {depth}",
+        f"num_documents: {len(node.doc_ids)}",
+        "---",
+        "",
+        "## Overview",
+        "",
+        (node.summary or "").strip() or "(no summary)",
+        "",
+        "## Contents",
+        "",
+    ]
+    if node.children:
+        lines.append("### Sub-groups (directories)")
+        lines.append("")
+        for child in node.children:
+            child_name = child.folder_name or child.label or "cluster"
+            summary_snip = (child.summary or "")[:200].replace("\n", " ").strip()
+            lines.append(f"- **{child_name}/** ({len(child.doc_ids)} docs): {summary_snip}")
+        lines.append("")
+    else:
+        lines.append(f"### Documents ({len(node.doc_ids)} items)")
+        lines.append("")
+        for doc_id in node.doc_ids:
+            preview = node.doc_texts.get(doc_id, "")
+            first_line = (preview.split("\n", 1)[0] if preview else "").strip()[:120]
+            lines.append(f"- `{doc_id}`: {first_line}")
+        lines.append("")
+    return "\n".join(lines)
+
+
+def skill_node_es_row(ctx: TaskContext, node: "SkillNode") -> Dict:
+    """Build the ES row for one tree node. Stable id from
+    (kb_id, folder_name) so re-runs upsert cleanly."""
+    kb_id_str = str(ctx.kb_id)
+    row_id = xxhash.xxh64(
+        f"skill:{kb_id_str}:{node.folder_name}".encode("utf-8", "surrogatepass"),
+    ).hexdigest()
+    return {
+        "id": row_id,
+        "kb_id": kb_id_str,
+        "doc_id": kb_id_str,  # KB-scoped sentinel
+        "compile_kwd": "skill",
+        "skill_kwd": node.folder_name,
+        "depth_int": int(node.level),
+        "children_kwd": [c.folder_name for c in node.children],
+        "source_doc_ids": list(node.doc_ids),
+        "md_with_weight": build_skill_md(node),
+        "available_int": 1,
+    }
+
+
+def skill_tree_md_snippet(node: "SkillNode") -> str:
+    """Return only the frontmatter/preamble before the Overview body.
+
+    The one-shot tree browser needs enough metadata to render the skill
+    directory without loading every full node body up front.
+    """
+    md = build_skill_md(node)
+    return md.split("\n## Overview", 1)[0].strip()
+
+
+def skill_tree_node(node: "SkillNode") -> Dict:
+    return {
+        "skill_kwd": node.folder_name,
+        "md_with_weight": skill_tree_md_snippet(node),
+        "children_kwd": [skill_tree_node(child) for child in node.children],
+    }
+
+
+def skill_all_es_row(ctx: TaskContext, roots: list["SkillNode"]) -> Dict:
+    """Build the aggregate tree row loaded by the Skills sidebar."""
+    kb_id_str = str(ctx.kb_id)
+    row_id = xxhash.xxh64(
+        f"skill_all:{kb_id_str}".encode("utf-8", "surrogatepass"),
+    ).hexdigest()
+    return {
+        "id": row_id,
+        "kb_id": kb_id_str,
+        "doc_id": kb_id_str,
+        "compile_kwd": "skill_all",
+        "skill_with_weight": json.dumps(
+            [skill_tree_node(root) for root in roots],
+            ensure_ascii=False,
+            indent=2,
+        ),
+        "available_int": 1,
+    }
+
+
+# ----- main entry ----------------------------------------------------
+
+
+async def run_corpus2skill(
+    ctx: TaskContext,
+    embedding_model,
+    load_chunks_for_doc: Callable[..., AsyncIterator[list[dict]]],
+) -> None:
+    """Build a hierarchical skill tree for the current KB and persist
+    one ES row per node under ``compile_kwd="skill"`` plus a full
+    recursive aggregate row under ``compile_kwd="skill_all"``.
+
+    Always-rebuild semantics for v1: every parsed doc in the KB is
+    re-summarized on each call. (Incremental "only changed docs" is a
+    TODO — needs a per-doc content-hash similar to MAP's
+    ``chunk_hash_kwd``.)
+    """
+    # Local imports so the module doesn't drag in the API service layer
+    # at import time — that's a source of circular-import risk given how
+    # much lives under ``api.db.services``.
+    from api.db.services.document_service import DocumentService
+    from api.db.services.llm_service import LLMBundle
+    from api.db.joint_services.tenant_model_service import (
+        get_tenant_default_model_by_type,
+    )
+    from rag.advanced_rag.knowlege_compile.raptor import (
+        RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor,
+    )
+
+    progress = ctx.progress_cb
+    progress(0.0, "skill: loading documents")
+
+    # ---- Phase 0: chat model + RAPTOR instance for summarization/clustering.
+    chat_model_config = get_tenant_default_model_by_type(ctx.tenant_id, LLMType.CHAT)
+    chat_mdl = LLMBundle(ctx.tenant_id, chat_model_config, lang=ctx.language)
+
+    raptor = Raptor(
+        max_cluster=128,
+        llm_model=chat_mdl,
+        embd_model=embedding_model,
+        prompt="Please write a concise summary of the following texts:\n{cluster_content}",
+        max_token=256,
+        threshold=0.1,
+        max_errors=3,
+    )
+
+    # ---- Phase 1: per-doc summaries.
+    all_docs, _ = await thread_pool_exec(
+        DocumentService.get_by_kb_id,
+        kb_id=ctx.kb_id,
+        page_number=0,
+        items_per_page=0,
+        orderby="create_time",
+        desc=False,
+        keywords="",
+        run_status=[],
+        types=[],
+        suffix=[],
+    )
+    eligible_docs = [d for d in (all_docs or []) if d.get("id")]
+    if not eligible_docs:
+        progress(1.0, "skill: no documents in KB")
+        return
+
+    # Phase-1 gate: bail before spinning up N per-doc summarizations.
+    if ctx.has_canceled_func(ctx.id):
+        progress(-1, "skill: task has been canceled")
+        return
+
+    n_docs = len(eligible_docs)
+    progress(0.05, f"skill: summarizing {n_docs} document(s)")
+    doc_sem = asyncio.Semaphore(SKILL_DOC_SUMMARY_CONCURRENCY)
+
+    async def _summarize_doc(d: Dict) -> Optional[SkillNode]:
+        async with doc_sem:
+            try:
+                return await doc_summary_for_skill(
+                    d["id"],
+                    raptor,
+                    chat_mdl,
+                    ctx,
+                    load_chunks_for_doc,
+                )
+            except Exception:
+                logging.exception(
+                    "skill: doc summary failed for doc=%s",
+                    d.get("id"),
+                )
+                return None
+
+    leaf_results = await asyncio.gather(
+        *(_summarize_doc(d) for d in eligible_docs),
+        return_exceptions=False,
+    )
+    leaves: list[SkillNode] = [n for n in leaf_results if n is not None]
+    if not leaves:
+        progress(1.0, "skill: no doc summaries produced")
+        return
+
+    # Post-Phase-1 gate: bail before starting the iterative clustering.
+    if ctx.has_canceled_func(ctx.id):
+        progress(-1, "skill: task has been canceled")
+        return
+
+    # ---- Phase 2-4: iterative clustering until ≤ MAX_TOP.
+    current_layer = leaves
+    level = 0
+    for iteration in range(SKILL_MAX_TREE_ITERATIONS):
+        # Per-iteration gate: caps the wasted LLM cost when the task is
+        # canceled mid-way through a many-layer clustering run.
+        if ctx.has_canceled_func(ctx.id):
+            progress(-1, "skill: task has been canceled")
+            return
+        if len(current_layer) <= SKILL_MAX_TOP_CLUSTERS:
+            break
+        progress(
+            0.3 + 0.4 * iteration / SKILL_MAX_TREE_ITERATIONS,
+            f"skill: clustering layer {level} ({len(current_layer)} nodes)",
+        )
+        try:
+            embeddings = np.asarray([n.vec for n in current_layer])
+            n_clusters, labels = raptor.clustering(
+                embeddings,
+                random_state=0,
+                task_id="",
+            )
+        except Exception:
+            logging.exception("skill: clustering failed at level %d", level)
+            break
+        if n_clusters <= 0 or n_clusters >= len(current_layer):
+            # No reduction → stop to avoid an infinite loop.
+            logging.warning(
+                "skill: clustering did not reduce node count (%d → %d); stopping",
+                len(current_layer),
+                n_clusters,
+            )
+            break
+
+        cluster_buckets: dict[int, list[SkillNode]] = {}
+        for idx, lbl in enumerate(labels):
+            cluster_buckets.setdefault(int(lbl), []).append(current_layer[idx])
+
+        async def _summarize_cluster(children: list[SkillNode]) -> Optional[SkillNode]:
+            texts = [c.summary for c in children if c.summary]
+            if not texts:
+                return None
+            try:
+                res = await raptor._summarize_texts(texts, callback=None, task_id="")
+            except Exception:
+                logging.exception("skill: cluster summary failed")
+                return None
+            if res is None:
+                return None
+            title, summary_text, vec = res
+            merged_doc_ids: list[str] = []
+            seen: set[str] = set()
+            merged_doc_texts: dict[str, str] = {}
+            for c in children:
+                for did in c.doc_ids:
+                    if did and did not in seen:
+                        seen.add(did)
+                        merged_doc_ids.append(did)
+                merged_doc_texts.update(c.doc_texts)
+            return SkillNode(
+                level=level + 1,
+                label="",
+                summary=summary_text or title,
+                vec=np.asarray(vec),
+                doc_ids=merged_doc_ids,
+                doc_texts=merged_doc_texts,
+                children=list(children),
+                folder_name="",
+            )
+
+        parent_results = await asyncio.gather(
+            *(_summarize_cluster(children) for children in cluster_buckets.values()),
+            return_exceptions=False,
+        )
+        next_layer = [p for p in parent_results if p is not None]
+        if not next_layer:
+            logging.warning("skill: no cluster summaries produced at level %d", level)
+            break
+        current_layer = next_layer
+        level += 1
+
+    roots: list[SkillNode] = current_layer
+
+    # ---- Phase 5: label every node (concurrent), then assign folders.
+    all_nodes: list[SkillNode] = []
+
+    def _collect(node: SkillNode) -> None:
+        all_nodes.append(node)
+        for c in node.children:
+            _collect(c)
+
+    for r in roots:
+        _collect(r)
+
+    progress(0.75, f"skill: labelling {len(all_nodes)} cluster node(s)")
+    label_sem = asyncio.Semaphore(SKILL_LABEL_CONCURRENCY)
+    labels_out = await asyncio.gather(
+        *(label_skill_node_one(n.summary, chat_mdl, label_sem) for n in all_nodes),
+        return_exceptions=False,
+    )
+    for n, lbl in zip(all_nodes, labels_out):
+        n.label = lbl or "cluster"
+
+    # Folder naming: roots get ``skill-NN-<label>``; deeper nodes get
+    # ``group-NN-<label>`` keyed by their position in the parent's
+    # children list (mirrors Corpus2Skill's skill_builder).
+    def _assign_folders(node: SkillNode, idx: int, is_root: bool) -> None:
+        slug = skill_safe_name(node.label)
+        prefix = f"skill-{idx:02d}" if is_root else f"group-{idx:02d}"
+        node.folder_name = f"{prefix}-{slug}" if slug else prefix
+        for ci, child in enumerate(node.children):
+            _assign_folders(child, ci, is_root=False)
+
+    for ri, root in enumerate(roots):
+        _assign_folders(root, ri, is_root=True)
+
+    # Final gate before the destructive delete + bulk insert. This is
+    # the most important check — without it a late cancel would still
+    # wipe the KB's existing ``skill``/``skill_all`` rows AND spend a
+    # full bulk-insert round-trip on data the caller no longer wants.
+    if ctx.has_canceled_func(ctx.id):
+        progress(-1, "skill: task has been canceled")
+        return
+
+    # ---- Phase 6: clean + bulk insert.
+    index = search.index_name(ctx.tenant_id)
+    try:
+        await thread_pool_exec(
+            settings.docStoreConn.delete,
+            {"compile_kwd": ["skill", "skill_all"]},
+            index,
+            ctx.kb_id,
+        )
+    except Exception:
+        logging.debug("skill: prior delete failed; relying on id-upsert")
+
+    rows = [skill_node_es_row(ctx, n) for n in all_nodes]
+    rows.append(skill_all_es_row(ctx, roots))
+    if not rows:
+        progress(1.0, "skill: nothing to persist")
+        return
+
+    try:
+        await thread_pool_exec(
+            settings.docStoreConn.insert,
+            rows,
+            index,
+            ctx.kb_id,
+        )
+    except Exception:
+        logging.exception("skill: bulk insert failed (rows=%d)", len(rows))
+        return
+
+    progress(
+        1.0,
+        f"skill: built {len(roots)} top-level skill(s), {len(all_nodes)} total node(s), {len(leaves)} doc(s)",
+    )
diff --git a/rag/svr/task_executor_refactor/dataset_wiki_generator.py b/rag/svr/task_executor_refactor/dataset_wiki_generator.py
new file mode 100644
index 0000000000..a34fe84d0d
--- /dev/null
+++ b/rag/svr/task_executor_refactor/dataset_wiki_generator.py
@@ -0,0 +1,805 @@
+#
+#  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+"""KB-wide wiki / artifact compilation.
+
+Extracted from ``rag.svr.task_executor_refactor.task_handler`` where the
+same pipeline previously lived as a set of ``_wiki_*`` / ``_persist_wiki_*``
+methods and one ``_run_wiki`` orchestrator. The public entry point is
+:func:`run_wiki`.
+
+The pipeline runs MAP per (doc, template) — each MAP call resumes from
+its own ``artifact_map_extract`` ES rows — then REDUCE / PLAN / REFINE
+KB-wide via ``rag.advanced_rag.knowlege_compile.wiki``. Refined pages
+land in ES twice: once as searchable ``artifact_page`` rows and once as
+``artifact_entity`` / ``artifact_relation`` rows for the dataset
+Artifact tab's canvas graph.
+
+Design notes:
+
+* ``load_chunks_for_doc`` is injected rather than imported to keep the
+  module decoupled from ``TaskHandler``'s streaming chunk loader.
+* The eligibility loop resolves each doc's
+  ``parser_config.compilation_template_group_id`` to a template list
+  via ``CompilationTemplateGroupService.resolve_template_ids``. That
+  helper is duplicated as a small private function here so the module
+  stays free of a task_handler import (which would be circular).
+* The persistence helpers (``persist_wiki_pages_to_es`` etc.) are
+  exposed at module level for testing but are only called from
+  :func:`run_wiki` in production.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+from typing import AsyncIterator, Callable, Dict, List, Optional
+
+import xxhash
+
+from common import settings
+from common.constants import LLMType
+from common.misc_utils import thread_pool_exec
+from rag.nlp import search
+from rag.advanced_rag.knowlege_compile.wiki import (
+    wiki_map_from_chunks,
+    wiki_plan_from_reduction,
+    wiki_reduce_from_extracts,
+    wiki_refine_from_plan,
+)
+from rag.svr.task_executor_refactor.task_context import TaskContext
+
+
+# ----- tunables ------------------------------------------------------
+# Artifact-MAP tuning: how many chunks to feed per ``wiki_map_from_chunks``
+# invocation. The function does its own per-call resume-set load + ES
+# persist, so smaller batches mean more (small) ES round-trips but a flat
+# memory footprint. 64 keeps the resume-set re-reads cheap while leaving
+# room for the function's internal split_chunks packing to do real work.
+WIKI_MAP_BATCH_CHUNKS = 64
+
+# Per-node cap on ``source_chunk_ids`` carried by the canvas graph blob.
+# Pages can accumulate hundreds of source chunks; the graph response is
+# meant for fast canvas rendering, not full provenance audit, so we trim
+# each node's list. The full per-page list is still available on the
+# ``artifact_page`` row the UI deep-links into.
+WIKI_GRAPH_MAX_CHUNK_IDS_PER_NODE = 64
+
+# Title + comments stamped on every ``artifact_commit`` row produced by
+# :func:`run_wiki`'s regeneration path (as opposed to the dialog-edit
+# path, which carries the user's own title/comments).
+WIKI_REGEN_COMMIT_TITLE = "Regenerated by artifact compilation"
+WIKI_REGEN_COMMIT_COMMENTS_TEMPLATE = "Auto-update via run_wiki (action={action})"
+
+
+# ----- helpers -------------------------------------------------------
+
+
+def _parser_config_compilation_template_group_id(parser_config) -> str:
+    """Read the single template-group id from a doc's ``parser_config``.
+
+    Duplicated from ``task_handler`` so this module doesn't import from
+    it (avoids a circular import — ``task_handler`` imports this module
+    from its ``task_type == "artifact"`` dispatch branch).
+    """
+    if not isinstance(parser_config, dict):
+        return ""
+    gid = parser_config.get("compilation_template_group_id")
+    if isinstance(gid, str) and gid.strip():
+        return gid.strip()
+    ext = parser_config.get("ext")
+    if isinstance(ext, dict):
+        gid = ext.get("compilation_template_group_id")
+        if isinstance(gid, str) and gid.strip():
+            return gid.strip()
+    return ""
+
+
+def _parser_config_compilation_template_ids(parser_config, tenant_id: str) -> list[str]:
+    """Resolve a doc's ``parser_config`` to its compile-template ids by
+    looking up the configured group. Returns ``[]`` if the doc has no
+    group set or the group cannot be resolved."""
+    from api.db.services.compilation_template_group_service import (
+        CompilationTemplateGroupService,
+    )
+
+    group_id = _parser_config_compilation_template_group_id(parser_config)
+    if not group_id:
+        return []
+    return CompilationTemplateGroupService.resolve_template_ids(group_id, tenant_id)
+
+
+# ----- persistence ---------------------------------------------------
+
+
+async def persist_wiki_pages_to_es(
+    ctx: TaskContext,
+    pages: List[Dict],
+    embd_mdl,
+) -> None:
+    """Insert one ES row per generated artifact page using the
+    knowledge-compilation schema:
+
+      id                  xxh64(kb_id + ":" + slug)
+      compile_kwd         "artifact_page"
+      slug_kwd            page.slug
+      title_kwd           page.title
+      page_type_kwd       page.page_type
+      entity_names_kwd    page.entity_names
+      outlinks_kwd        page.outlinks
+      related_kb_pages_kwd page.related_kb_pages
+      source_chunk_ids    page.source_chunk_ids
+      source_doc_ids      page.source_doc_ids
+      kb_id               ctx.kb_id
+      content_with_weight rendered markdown
+      content_ltks /
+      content_sm_ltks     tokenize(content_md + summary)
+      q_<dim>_vec         embed(summary)
+
+    ``action`` is intentionally not stored — it's a planner artifact
+    and has no meaning post-write.
+    """
+    if not pages:
+        return
+
+    from rag.nlp import rag_tokenizer
+    from common.doc_store.doc_store_base import OrderByExpr
+    from api.db.services.file_commit_service import (
+        FileCommitService as WikiCommitService,
+    )
+
+    index = search.index_name(ctx.tenant_id)
+    kb_id_str = str(ctx.kb_id)
+
+    # Capture the prior rendered content for every slug we're about to
+    # overwrite, so the per-page commit row downstream has a real diff
+    # baseline. Single batch read by slug_kwd IN [...] — one round-trip
+    # regardless of page count. Failures here degrade gracefully.
+    target_slugs: list[str] = [(p.get("slug") or "").strip() for p in pages if isinstance(p.get("slug"), str) and p.get("slug")]
+    prior_by_slug: dict[str, str] = {}
+    if target_slugs:
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                ["id", "slug_kwd", "content_with_weight"],
+                [],
+                {"compile_kwd": ["artifact_page"], "slug_kwd": list(target_slugs)},
+                [],
+                OrderByExpr(),
+                0,
+                max(len(target_slugs), 1),
+                index,
+                [ctx.kb_id],
+            )
+            field_map = settings.docStoreConn.get_fields(
+                res,
+                ["id", "slug_kwd", "content_with_weight"],
+            )
+            for row in (field_map or {}).values():
+                s = row.get("slug_kwd")
+                c = row.get("content_with_weight")
+                if isinstance(s, str) and isinstance(c, str):
+                    prior_by_slug[s] = c
+        except Exception:
+            logging.exception(
+                "wiki_persist: prior-content read failed for kb=%s; commit audit will treat all pages as creations",
+                kb_id_str,
+            )
+
+    # Batch the summary embeddings in one model call. Empty strings are
+    # swapped for a single space so the encoder doesn't reject them —
+    # they still yield a vector but contribute nothing meaningful.
+    summaries = [(p.get("summary") or "").strip() for p in pages]
+    embed_inputs = [s if s else " " for s in summaries]
+    try:
+        embeddings, _ = await thread_pool_exec(embd_mdl.encode, embed_inputs)
+    except Exception:
+        logging.exception(
+            "wiki_persist: summary embedding batch failed for kb=%s",
+            kb_id_str,
+        )
+        return
+    try:
+        n_emb = len(embeddings) if embeddings is not None else 0
+    except TypeError:
+        n_emb = 0
+    if n_emb != len(pages):
+        logging.warning(
+            "artifact_persist: embedding count %d != pages %d for kb=%s; aborting",
+            n_emb,
+            len(pages),
+            kb_id_str,
+        )
+        return
+
+    rows: List[Dict] = []
+    for page, vec in zip(pages, embeddings):
+        slug = page.get("slug") or ""
+        if not slug:
+            continue
+        title = page.get("title") or slug
+        summary = page.get("summary") or ""
+        content_md = page.get("content_md_rendered") or page.get("content_md") or page.get("content_md_raw") or ""
+
+        vec_list = vec.tolist() if hasattr(vec, "tolist") else list(vec)
+        if not vec_list:
+            logging.warning(
+                "artifact_persist: empty embedding for slug=%s; skipping",
+                slug,
+            )
+            continue
+
+        text_for_search = (content_md + "\n\n" + summary).strip()
+        content_ltks = rag_tokenizer.tokenize(text_for_search) if text_for_search else ""
+        content_sm_ltks = rag_tokenizer.fine_grained_tokenize(content_ltks) if content_ltks else ""
+
+        row_id = xxhash.xxh64(
+            f"{kb_id_str}:{slug}".encode("utf-8", "surrogatepass"),
+        ).hexdigest()
+
+        rows.append(
+            {
+                "id": row_id,
+                "kb_id": kb_id_str,
+                "doc_id": kb_id_str,  # sentinel; KB-scoped row, real provenance in source_doc_ids
+                "compile_kwd": "artifact_page",
+                "slug_kwd": slug,
+                "title_kwd": title,
+                "page_type_kwd": page.get("page_type") or "concept",
+                "entity_names_kwd": list(page.get("entity_names") or []),
+                "outlinks_kwd": list(page.get("outlinks") or []),
+                "outlinks_int": len(list(page.get("outlinks") or [])),
+                "related_kb_pages_kwd": list(page.get("related_kb_pages") or []),
+                "source_chunk_ids": list(page.get("source_chunk_ids") or []),
+                "source_doc_ids": list(page.get("source_doc_ids") or []),
+                "content_with_weight": content_md,
+                # Summary kept verbatim alongside the rendered body so the
+                # viewer can render it as a distinct (smaller) block above
+                # the main content.
+                "summary_with_weight": summary,
+                "content_ltks": content_ltks,
+                "content_sm_ltks": content_sm_ltks,
+                f"q_{len(vec_list)}_vec": vec_list,
+                "available_int": 1,
+            }
+        )
+
+    if not rows:
+        return
+
+    try:
+        await thread_pool_exec(settings.docStoreConn.insert, rows, index, ctx.kb_id)
+    except Exception:
+        logging.exception(
+            "wiki_persist: bulk insert failed for kb=%s (rows=%d)",
+            kb_id_str,
+            len(rows),
+        )
+        return
+
+    # Audit trail: one ArtifactCommit row per page whose rendered
+    # content actually changed (record_edit silently skips empty diffs).
+    # Best-effort — commit failures log but don't fail the artifact
+    # compile.
+    for page in pages:
+        slug = (page.get("slug") or "").strip()
+        if not slug:
+            continue
+        if not prior_by_slug.get(slug, ""):
+            continue
+        content_md = page.get("content_md_rendered") or page.get("content_md") or page.get("content_md_raw") or ""
+        action = (page.get("action") or "CREATE").upper()
+        try:
+            WikiCommitService.record_page_edit(
+                tenant_id=ctx.tenant_id,
+                kb_id=ctx.kb_id,
+                page_type=page.get("page_type") or "concept",
+                slug=slug,
+                content_before=prior_by_slug.get(slug, ""),
+                content_after=content_md,
+                title=WIKI_REGEN_COMMIT_TITLE,
+                comments=WIKI_REGEN_COMMIT_COMMENTS_TEMPLATE.format(action=action),
+                user_id=None,  # system commit — no human author
+            )
+        except Exception:
+            logging.exception(
+                "wiki_persist: commit record failed for kb=%s slug=%s",
+                kb_id_str,
+                slug,
+            )
+
+
+def build_wiki_page_graph(
+    pages: List[Dict],
+    kb_id: str,
+) -> tuple[List[Dict], List[Dict]]:
+    """Project the REFINE-emitted page list onto per-entity and
+    per-relation ES rows.
+
+    Returns:
+        (entity_rows, relation_rows)
+
+    Both lists are ES-ready docs (one per node / per surviving edge)
+    using the standard artifact envelope. They are BM25-only (no
+    ``q_<dim>_vec``) — entities carry ``content_ltks`` derived from
+    ``slug + " " + summary`` so name/summary lookups hit the lexical
+    index.
+
+    ``by_slug`` is kept internally only to filter dangling outlinks
+    (a target slug not present as a node in this KB).
+    """
+    from rag.nlp import rag_tokenizer
+
+    by_slug: Dict[str, Dict] = {}
+    entity_rows: List[Dict] = []
+    for p in pages or []:
+        slug = (p.get("slug") or "").strip()
+        if not slug:
+            continue
+        outlinks_raw = p.get("outlinks") or []
+        # ``weight`` is the page's outlink count. Drives node size on
+        # the canvas. Computed on the raw outlink list before dangling-
+        # target filtering so visual weight reflects what the writer
+        # actually emitted.
+        weight = len(outlinks_raw) if isinstance(outlinks_raw, list) else 0
+
+        # Per-node provenance: union of source chunks REFINE attributed
+        # to this page. Dedup preserves first-seen order; cap the list
+        # to keep the graph blob small.
+        raw_chunk_ids = p.get("source_chunk_ids") or []
+        seen_chunk_ids: dict[str, None] = {}
+        for cid in raw_chunk_ids:
+            if isinstance(cid, str) and cid and cid not in seen_chunk_ids:
+                seen_chunk_ids[cid] = None
+                if len(seen_chunk_ids) >= WIKI_GRAPH_MAX_CHUNK_IDS_PER_NODE:
+                    break
+        capped_chunk_ids = list(seen_chunk_ids.keys())
+
+        page_type = p.get("page_type") or "concept"
+        description = p.get("summary") or ""
+        name = p.get("title") or slug
+        aliases = list(p.get("entity_names") or [])
+
+        by_slug[slug] = {
+            "slug": slug,
+            "name": name,
+            "aliases": aliases,
+            "description": description,
+            "type": page_type,
+            "weight": weight,
+            "source_chunk_ids": capped_chunk_ids,
+        }
+
+        # Per-entity ES row. content_ltks is built from slug + summary
+        # so BM25 hits both the deep-link key and human prose.
+        content_text = (slug + " " + description).strip()
+        entity_payload = {
+            "slug": slug,
+            "name": name,
+            "aliases": aliases,
+            "description": description,
+            "type": page_type,
+            "weight": weight,
+        }
+        entity_rows.append(
+            {
+                "id": xxhash.xxh64(
+                    f"artifact_entity:{kb_id}:{slug}".encode("utf-8", "surrogatepass"),
+                ).hexdigest(),
+                "kb_id": kb_id,
+                "doc_id": kb_id,  # KB-scoped sentinel
+                "available_int": 1,
+                "compile_kwd": "artifact_entity",
+                "type_kwd": "artifact_" + page_type,
+                "slug_kwd": slug,
+                "weight_int": int(weight),
+                "source_chunk_ids": capped_chunk_ids,
+                "content_ltks": rag_tokenizer.tokenize(content_text) if content_text else "",
+                "content_with_weight": json.dumps(entity_payload, ensure_ascii=False),
+            }
+        )
+
+    relation_rows: List[Dict] = []
+    for p in pages or []:
+        src = (p.get("slug") or "").strip()
+        if not src or src not in by_slug:
+            continue
+        for raw_target in p.get("outlinks") or []:
+            if isinstance(raw_target, str):
+                tgt = raw_target.strip()
+            elif isinstance(raw_target, dict):
+                tgt = str(raw_target.get("slug") or "").strip()
+            else:
+                tgt = ""
+            if not tgt or tgt == src or tgt not in by_slug:
+                continue
+            relation_payload = {"from": src, "to": tgt}
+            relation_rows.append(
+                {
+                    "id": xxhash.xxh64(
+                        f"artifact_relation:{kb_id}:{src}:{tgt}".encode("utf-8", "surrogatepass"),
+                    ).hexdigest(),
+                    "kb_id": kb_id,
+                    "doc_id": kb_id,
+                    "available_int": 1,
+                    "compile_kwd": "artifact_relation",
+                    "type_kwd": "artifact_relation",
+                    "from_kwd": src,
+                    "to_kwd": tgt,
+                    "content_with_weight": json.dumps(relation_payload, ensure_ascii=False),
+                }
+            )
+
+    return entity_rows, relation_rows
+
+
+async def persist_wiki_page_graph_to_es(
+    ctx: TaskContext,
+    pages: List[Dict],
+) -> None:
+    """Materialize and store the per-entity / per-relation ES rows
+    derived from artifact pages.
+
+    Writes two row types — both delete-then-insert for idempotent
+    re-runs:
+
+    1. ``compile_kwd="artifact_entity"`` — one row per page node,
+       BM25-only via ``content_ltks``.
+    2. ``compile_kwd="artifact_relation"`` — one row per surviving
+       edge (dangling outlinks dropped by the builder).
+
+    Also sweeps any leftover legacy ``artifact_page_graph`` blob so
+    the index doesn't accumulate stale state.
+    """
+    kb_id_str = str(ctx.kb_id)
+    entity_rows, relation_rows = build_wiki_page_graph(pages or [], kb_id_str)
+
+    index = search.index_name(ctx.tenant_id)
+
+    async def _replace_bucket(kwd: str, rows: List[Dict]) -> None:
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": kwd},
+                index,
+                ctx.kb_id,
+            )
+        except Exception:
+            logging.debug(
+                "%s: prior delete failed; relying on id-upsert",
+                kwd,
+            )
+        if not rows:
+            return
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.insert,
+                rows,
+                index,
+                ctx.kb_id,
+            )
+        except Exception:
+            logging.exception(
+                "%s: insert failed for kb=%s (%d rows)",
+                kwd,
+                kb_id_str,
+                len(rows),
+            )
+
+    async def _sweep_legacy_blob() -> None:
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": "artifact_page_graph"},
+                index,
+                ctx.kb_id,
+            )
+        except Exception:
+            logging.debug(
+                "artifact_page_graph: legacy blob sweep failed for kb=%s",
+                kb_id_str,
+            )
+
+    await asyncio.gather(
+        _replace_bucket("artifact_entity", entity_rows),
+        _replace_bucket("artifact_relation", relation_rows),
+        _sweep_legacy_blob(),
+    )
+
+
+# ----- main entry ----------------------------------------------------
+
+
+async def run_wiki(
+    ctx: TaskContext,
+    embedding_model,
+    load_chunks_for_doc: Callable[..., AsyncIterator[list[dict]]],
+) -> None:
+    """KB-wide artifact compilation task.
+
+    Runs after the user clicks the "Artifact" button in the dataset
+    generate menu. Iterates every doc in the KB whose parser_config
+    has a compilation template group resolving to an artifacts-kind
+    child, runs MAP per-doc (which uses ES-stored resume rows to skip
+    chunks already processed in a previous run), then runs REDUCE /
+    PLAN / REFINE KB-wide and persists pages.
+
+    Batching: each MAP call uses ``batch_size_cap=8`` and
+    ``window_fraction=0.5`` — i.e. roll over to a new batch when the
+    current batch reaches 8 chunks OR its accumulated token count
+    exceeds 50% of the chat model's ``max_length``.
+    """
+    # Local imports so this module doesn't drag in the API service
+    # layer at import time — that's a source of circular-import risk
+    # given how much lives under ``api.db.services``.
+    from api.db.services.document_service import DocumentService
+    from api.db.services.knowledgebase_service import KnowledgebaseService
+    from api.db.services.compilation_template_service import CompilationTemplateService
+    from api.db.services.llm_service import LLMBundle
+    from api.db.joint_services.tenant_model_service import (
+        get_tenant_default_model_by_type,
+        get_model_config_from_provider_instance,
+    )
+    from api.apps.restful_apis.chunk_api import _compilation_template_kind
+
+    progress = ctx.progress_cb
+    progress(0.0, "Loading documents for wiki compilation...")
+
+    # 1. Resolve KB metadata for PLAN.
+    ok, kb = KnowledgebaseService.get_by_id(ctx.kb_id)
+    if not ok:
+        progress(-1, f"KB {ctx.kb_id} not found.")
+        return
+    kb_name = kb.name
+    kb_description = kb.description
+
+    # 2. Pick docs eligible for artifact compilation (those whose
+    # configured template group resolves to at least one artifacts-kind
+    # child). The frontend Artifact button targets the KB, but the
+    # per-doc opt-in is what gates inclusion.
+    all_docs, _ = await thread_pool_exec(
+        DocumentService.get_by_kb_id,
+        kb_id=ctx.kb_id,
+        page_number=0,
+        items_per_page=0,
+        orderby="create_time",
+        desc=False,
+        keywords="",
+        run_status=[],
+        types=[],
+        suffix=[],
+    )
+    eligible = []
+    for d in all_docs or []:
+        pc = d.get("parser_config") or {}
+        for template_id in _parser_config_compilation_template_ids(pc, ctx.tenant_id):
+            template = CompilationTemplateService.get_saved(template_id, ctx.tenant_id)
+            config = template.get("config") if template else {}
+            kind = _compilation_template_kind(config.get("kind") if isinstance(config, dict) else "")
+            if kind == "artifacts":
+                eligible.append((d, template_id))
+                break
+    if not eligible:
+        progress(1.0, "No documents are configured for wiki compilation.")
+        return
+
+    # 3. Resolve chat models. MAP is per-(doc, template) so each pair
+    # uses its template's own ``llm_id``. REDUCE / PLAN / REFINE are
+    # KB-wide and need exactly one model — we pick the first eligible
+    # template's ``llm_id`` as the canonical KB chat model.
+    llm_bundle_cache: dict[str, LLMBundle] = {}
+
+    def _bundle_for(llm_id: str | None) -> LLMBundle:
+        key = (llm_id or "").strip() or "__tenant_default__"
+        cached = llm_bundle_cache.get(key)
+        if cached is not None:
+            return cached
+        try:
+            if key == "__tenant_default__":
+                cfg = get_tenant_default_model_by_type(ctx.tenant_id, LLMType.CHAT)
+            else:
+                cfg = get_model_config_from_provider_instance(
+                    ctx.tenant_id,
+                    LLMType.CHAT,
+                    key,
+                )
+        except Exception:
+            logging.exception(
+                "wiki: chat model resolution failed for llm_id=%s (kb=%s); falling back to tenant default",
+                key,
+                ctx.kb_id,
+            )
+            cfg = get_tenant_default_model_by_type(ctx.tenant_id, LLMType.CHAT)
+            key = "__tenant_default__"
+            cached = llm_bundle_cache.get(key)
+            if cached is not None:
+                return cached
+        bundle = LLMBundle(ctx.tenant_id, cfg, lang=ctx.language)
+        llm_bundle_cache[key] = bundle
+        return bundle
+
+    def _stage_cb(prefix: str):
+        def _cb(*args, **kwargs):
+            try:
+                if args and isinstance(args[0], (int, float)):
+                    msg = args[1] if len(args) > 1 else kwargs.get("msg", "")
+                    progress(msg=f"{prefix} {msg}")
+                else:
+                    msg = kwargs.get("msg") or (args[0] if args else "")
+                    progress(msg=f"{prefix} {msg}")
+            except Exception:
+                logging.exception("wiki: progress callback failed")
+
+        return _cb
+
+    # 4. MAP per eligible doc. Each MAP call's own resume mechanism
+    # (artifact_map_extract rows keyed by chunk_id) skips chunks that
+    # were already processed in a prior run — this is the incremental
+    # behavior the user asked for.
+    #
+    # ``kb_chat_llm_id`` is captured from the first eligible template
+    # and used as the canonical chat model for REDUCE/PLAN/REFINE.
+    # ``kb_writer_example`` follows the same first-template-wins rule:
+    # the REFINE writer's page-structure section is pulled from the
+    # first eligible artifact template's ``parser_config.example``
+    # override (None falls back to the built-in ``WIKI_TEMPLATE_EXAMPLE``).
+    kb_chat_llm_id: Optional[str] = None
+    kb_writer_example: Optional[str] = None
+    n_docs = len(eligible)
+    for i, (doc, template_id) in enumerate(eligible):
+        doc_id = doc["id"]
+        progress(
+            0.05 + 0.6 * (i / n_docs),
+            f"MAP {i + 1}/{n_docs}: {doc.get('name', doc_id)}",
+        )
+
+        template = CompilationTemplateService.get_saved(template_id, ctx.tenant_id)
+        if not template:
+            logging.warning(
+                "artifact: template %s not found for doc %s; skipping",
+                template_id,
+                doc_id,
+            )
+            continue
+        parser_cfg = template.get("config") or {}
+
+        map_llm_id = (parser_cfg.get("llm_id") or "").strip() if isinstance(parser_cfg, dict) else ""
+        map_chat_mdl = _bundle_for(map_llm_id)
+        if kb_chat_llm_id is None:
+            # First eligible template wins — canonical for KB-wide
+            # REDUCE/PLAN/REFINE further down.
+            kb_chat_llm_id = map_llm_id or None
+            tmpl_example = parser_cfg.get("example") if isinstance(parser_cfg, dict) else None
+            if isinstance(tmpl_example, str) and tmpl_example.strip():
+                kb_writer_example = tmpl_example
+
+        # Stream the doc's chunks in batches and call MAP per batch so
+        # peak memory stays bounded for long docs.
+        agg = {"entities": 0, "concepts": 0, "claims": 0, "relations": 0}
+        agg_delta = {"new": 0, "changed": 0, "unchanged": 0, "deleted": 0}
+        doc_had_delta = False
+        saw_any = False
+        batch_no = 0
+        async for batch in load_chunks_for_doc(
+            ctx.tenant_id,
+            ctx.kb_id,
+            doc_id,
+            batch_size=WIKI_MAP_BATCH_CHUNKS,
+        ):
+            saw_any = True
+            batch_no += 1
+            try:
+                phase1 = await wiki_map_from_chunks(
+                    chunks=batch,
+                    chat_mdl=map_chat_mdl,
+                    embd_mdl=embedding_model,
+                    doc_id=doc_id,
+                    tenant_id=ctx.tenant_id,
+                    kb_id=ctx.kb_id,
+                    language=ctx.language,
+                    callback=_stage_cb(f"[wiki MAP {i + 1}/{n_docs} b{batch_no}]"),
+                    parser_config=parser_cfg,
+                    batch_size_cap=8,
+                    window_fraction=0.5,
+                )
+            except Exception:
+                logging.exception(
+                    "wiki: MAP failed for doc %s batch %d",
+                    doc_id,
+                    batch_no,
+                )
+                continue
+            for k in agg.keys():
+                agg[k] += len(phase1.get(k) or [])
+            meta = phase1.get("_meta") or {}
+            if isinstance(meta, dict):
+                for k in agg_delta.keys():
+                    agg_delta[k] += int(meta.get(k, 0) or 0)
+                if meta.get("had_delta"):
+                    doc_had_delta = True
+
+        if not saw_any:
+            logging.info("wiki: no chunks for doc %s; skipping", doc_id)
+            continue
+        logging.info(
+            "wiki: MAP doc=%s entities=%d concepts=%d claims=%d relations=%d (batches=%d, new=%d changed=%d unchanged=%d deleted=%d, delta=%s)",
+            doc_id,
+            agg["entities"],
+            agg["concepts"],
+            agg["claims"],
+            agg["relations"],
+            batch_no,
+            agg_delta["new"],
+            agg_delta["changed"],
+            agg_delta["unchanged"],
+            agg_delta["deleted"],
+            doc_had_delta,
+        )
+
+    # 5. REDUCE / PLAN / REFINE KB-wide. Each phase has its own
+    # input_hash gate (REDUCE keys off the MAP-state hash, PLAN off
+    # REDUCE's hash, REFINE off PLAN's hash) so re-runs without an
+    # upstream delta short-circuit at the cache layer.
+    kb_chat_mdl = _bundle_for(kb_chat_llm_id)
+    try:
+        progress(0.65, "Reducing extracts KB-wide...")
+        await wiki_reduce_from_extracts(
+            chat_mdl=kb_chat_mdl,
+            embd_mdl=embedding_model,
+            tenant_id=ctx.tenant_id,
+            kb_id=ctx.kb_id,
+            callback=_stage_cb("[wiki REDUCE]"),
+        )
+
+        progress(0.75, "Planning wiki pages...")
+        await wiki_plan_from_reduction(
+            chat_mdl=kb_chat_mdl,
+            embd_mdl=embedding_model,
+            tenant_id=ctx.tenant_id,
+            kb_id=ctx.kb_id,
+            kb_name=kb_name,
+            kb_description=kb_description,
+            callback=_stage_cb("[wiki PLAN]"),
+        )
+
+        progress(0.85, "Refining pages...")
+        pages = await wiki_refine_from_plan(
+            chat_mdl=kb_chat_mdl,
+            embd_mdl=embedding_model,
+            tenant_id=ctx.tenant_id,
+            kb_id=ctx.kb_id,
+            callback=_stage_cb("[wiki REFINE]"),
+            example=kb_writer_example,
+        )
+    except Exception:
+        logging.exception("wiki: REDUCE/PLAN/REFINE failed for kb %s", ctx.kb_id)
+        progress(-1, "Wiki pipeline failed during REDUCE/PLAN/REFINE.")
+        return
+
+    # 6. Persist searchable artifact_page rows.
+    try:
+        await persist_wiki_pages_to_es(ctx, pages or [], embedding_model)
+    except Exception:
+        logging.exception("wiki: ES persist failed for kb %s", ctx.kb_id)
+
+    # 7. Materialize the canvas graph from the refined pages.
+    try:
+        await persist_wiki_page_graph_to_es(ctx, pages or [])
+    except Exception:
+        logging.exception("wiki: page-graph persist failed for kb %s", ctx.kb_id)
+
+    progress(1.0, f"Wiki compiled {len(pages or [])} page(s).")
diff --git a/rag/svr/task_executor_refactor/raptor_service.py b/rag/svr/task_executor_refactor/raptor_service.py
index c1695b9958..b72c952275 100644
--- a/rag/svr/task_executor_refactor/raptor_service.py
+++ b/rag/svr/task_executor_refactor/raptor_service.py
@@ -21,12 +21,14 @@ Provides [`RaptorService`](rag/svr/task_executor_refactor/raptor_service.py:48)
 """
 
 import copy
+import json
 import logging
 import os
 from datetime import datetime
 from typing import Dict, List, Optional, Set, Tuple
 
 import numpy as np
+import xxhash
 
 from api.db.services.document_service import DocumentService
 from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID
@@ -48,6 +50,31 @@ from rag.utils.raptor_utils import (
 from rag.svr.task_executor_refactor.task_context import TaskContext
 
 
+def _sum_tree_text_tokens(tree) -> int:
+    """Count tokens across every ``title`` string in the RAPTOR tree.
+
+    Mirrors the legacy ``tk_count`` semantic (sum over summary texts)
+    so the orchestrator's downstream logging / billing keeps working
+    when the tree path replaces the per-summary rows. Walks the dict
+    iteratively to avoid recursion-limit issues on deep trees.
+    """
+    if not isinstance(tree, dict):
+        return 0
+    total = 0
+    stack = [tree]
+    while stack:
+        node = stack.pop()
+        if not isinstance(node, dict):
+            continue
+        title = node.get("title")
+        if isinstance(title, str) and title:
+            total += num_tokens_from_string(title)
+        children = node.get("children")
+        if isinstance(children, list):
+            stack.extend(children)
+    return total
+
+
 class RaptorService:
     """Service for RAPTOR summary generation.
 
@@ -106,15 +133,11 @@ class RaptorService:
         # Determine scope
         if raptor_config.get("scope", "file") == "file":
             res, tk_count = await self._run_file_level_raptor(
-                raptor_config, tree_builder, clustering_method,
-                chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id,
-                max_errors, res, tk_count, cleanup_raptor_chunks
+                raptor_config, tree_builder, clustering_method, chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id, max_errors, res, tk_count, cleanup_raptor_chunks
             )
         else:
             res, tk_count = await self._run_dataset_level_raptor(
-                raptor_config, tree_builder, clustering_method,
-                chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id,
-                max_errors, res, tk_count, cleanup_raptor_chunks
+                raptor_config, tree_builder, clustering_method, chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id, max_errors, res, tk_count, cleanup_raptor_chunks
             )
 
         return res, tk_count, cleanup_raptor_chunks
@@ -135,15 +158,11 @@ class RaptorService:
             }
         return doc_info_by_id
 
-    async def _run_file_level_raptor(
-        self, raptor_config, tree_builder, clustering_method,
-        chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id,
-        max_errors, res, tk_count, cleanup_raptor_chunks
-    ):
+    async def _run_file_level_raptor(self, raptor_config, tree_builder, clustering_method, chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id, max_errors, res, tk_count, cleanup_raptor_chunks):
         """Run RAPTOR at file level (per document)."""
         ctx = self._task_context
         fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID
-        if self._task_context.write_interceptor: # dry run mode
+        if self._task_context.write_interceptor:  # dry run mode
             dataset_methods = set()
         else:
             dataset_methods = await self._get_raptor_chunk_methods(fake_doc_id, ctx.tenant_id, ctx.kb_id)
@@ -155,7 +174,7 @@ class RaptorService:
 
         for x, doc_id in enumerate(doc_ids):
             if self._should_skip_raptor(doc_id, doc_info_by_id, raptor_config):
-                self._task_context.progress_cb(prog=(x + 1.) / len(doc_ids))
+                self._task_context.progress_cb(prog=(x + 1.0) / len(doc_ids))
                 continue
             if self._task_context.write_interceptor:
                 existing_methods = set()
@@ -164,12 +183,10 @@ class RaptorService:
             if tree_builder in existing_methods:
                 has_file_level_target = True
                 if existing_methods != {tree_builder}:
-                    self._schedule_raptor_cleanup(
-                        doc_id, tree_builder, cleanup_raptor_chunks
-                    )
+                    self._schedule_raptor_cleanup(doc_id, tree_builder, cleanup_raptor_chunks)
                     self._task_context.progress_cb(msg=f"[RAPTOR] doc:{doc_id} will remove old RAPTOR summaries after insert.")
                 self._task_context.progress_cb(msg=f"[RAPTOR] doc:{doc_id} already has {tree_builder} RAPTOR chunks, skipping.")
-                self._task_context.progress_cb(prog=(x + 1.) / len(doc_ids))
+                self._task_context.progress_cb(prog=(x + 1.0) / len(doc_ids))
                 continue
 
             if existing_methods:
@@ -180,36 +197,25 @@ class RaptorService:
                 continue
 
             before_generate = len(res)
-            new_chunks, new_tk_count = await self._generate_raptor(
-                chunks, doc_id, raptor_config, chat_mdl, embd_mdl,
-                tree_builder, clustering_method, max_errors, doc_info_by_id
-            )
+            new_chunks, new_tk_count = await self._generate_raptor(chunks, doc_id, raptor_config, chat_mdl, embd_mdl, tree_builder, clustering_method, max_errors, doc_info_by_id)
             res.extend(new_chunks)
             tk_count += new_tk_count
 
             if len(res) > before_generate:
                 has_file_level_target = True
                 if existing_methods:
-                    self._schedule_raptor_cleanup(
-                        doc_id, tree_builder, cleanup_raptor_chunks
-                    )
-            self._task_context.progress_cb(prog=(x + 1.) / len(doc_ids))
+                    self._schedule_raptor_cleanup(doc_id, tree_builder, cleanup_raptor_chunks)
+            self._task_context.progress_cb(prog=(x + 1.0) / len(doc_ids))
 
         if remove_dataset_summaries:
             if has_file_level_target:
-                self._schedule_raptor_cleanup(
-                    fake_doc_id, None, cleanup_raptor_chunks
-                )
+                self._schedule_raptor_cleanup(fake_doc_id, None, cleanup_raptor_chunks)
             else:
                 self._task_context.progress_cb(msg="[RAPTOR] kept dataset-level summaries because no file-level summaries were built.")
 
         return res, tk_count
 
-    async def _run_dataset_level_raptor(
-        self, raptor_config, tree_builder, clustering_method,
-        chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id,
-        max_errors, res, tk_count, cleanup_raptor_chunks
-    ):
+    async def _run_dataset_level_raptor(self, raptor_config, tree_builder, clustering_method, chat_mdl, embd_mdl, vctr_nm, doc_ids, doc_info_by_id, max_errors, res, tk_count, cleanup_raptor_chunks):
         """Run RAPTOR at dataset level (all documents combined)."""
         ctx = self._task_context
         fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID
@@ -230,9 +236,7 @@ class RaptorService:
                 migrated_file_docs += 1
 
         if migrated_file_docs:
-            self._task_context.progress_cb(
-                msg=f"[RAPTOR] will remove file-level summaries for {migrated_file_docs} docs after dataset-level build succeeds."
-            )
+            self._task_context.progress_cb(msg=f"[RAPTOR] will remove file-level summaries for {migrated_file_docs} docs after dataset-level build succeeds.")
 
         if self._task_context.write_interceptor:
             existing_methods = set()
@@ -240,9 +244,7 @@ class RaptorService:
             existing_methods = await self._get_raptor_chunk_methods(fake_doc_id, ctx.tenant_id, ctx.kb_id)
         if tree_builder in existing_methods:
             if existing_methods != {tree_builder}:
-                self._schedule_raptor_cleanup(
-                    fake_doc_id, tree_builder, cleanup_raptor_chunks
-                )
+                self._schedule_raptor_cleanup(fake_doc_id, tree_builder, cleanup_raptor_chunks)
                 self._task_context.progress_cb(msg="[RAPTOR] will remove old dataset-level RAPTOR summaries after insert.")
             for doc_id in file_cleanup_doc_ids:
                 self._schedule_raptor_cleanup(doc_id, None, cleanup_raptor_chunks)
@@ -262,10 +264,7 @@ class RaptorService:
             return res, tk_count
 
         before_generate = len(res)
-        new_chunks, new_tk_count = await self._generate_raptor(
-            chunks, fake_doc_id, raptor_config, chat_mdl, embd_mdl,
-            tree_builder, clustering_method, max_errors, doc_info_by_id
-        )
+        new_chunks, new_tk_count = await self._generate_raptor(chunks, fake_doc_id, raptor_config, chat_mdl, embd_mdl, tree_builder, clustering_method, max_errors, doc_info_by_id)
         res.extend(new_chunks)
         tk_count += new_tk_count
 
@@ -273,15 +272,11 @@ class RaptorService:
             for doc_id in file_cleanup_doc_ids:
                 self._schedule_raptor_cleanup(doc_id, None, cleanup_raptor_chunks)
             if migrate_dataset_summaries:
-                self._schedule_raptor_cleanup(
-                    fake_doc_id, tree_builder, cleanup_raptor_chunks
-                )
+                self._schedule_raptor_cleanup(fake_doc_id, tree_builder, cleanup_raptor_chunks)
 
         return res, tk_count
 
-    def _should_skip_raptor(
-        self, doc_id: str, doc_info_by_id: Dict, raptor_config: Dict
-    ) -> bool:
+    def _should_skip_raptor(self, doc_id: str, doc_info_by_id: Dict, raptor_config: Dict) -> bool:
         """Check if RAPTOR should be skipped for a document."""
         ctx = self._task_context
         doc_info = doc_info_by_id.get(doc_id, {})
@@ -297,67 +292,64 @@ class RaptorService:
             return True
         return False
 
-    def _load_doc_chunks(self, doc_id: str, vctr_nm: str) -> List[Tuple[str, np.ndarray]]:
-        """Load chunks for a single document."""
+    def _load_doc_chunks(self, doc_id: str, vctr_nm: str) -> List[Tuple[str, np.ndarray, str]]:
+        """Load chunks for a single document.
+
+        Returns ``(content, vector, chunk_id)`` triples so downstream
+        RAPTOR can attach ``source_chunk_ids`` provenance onto every
+        summary it produces. ``chunk_id`` may be an empty string if the
+        retriever didn't surface one — defensive against legacy rows.
+        """
         ctx = self._task_context
-        chunks = []
+        chunks: List[Tuple[str, np.ndarray, str]] = []
         skipped_chunks = 0
 
-        fields = ["content_with_weight", vctr_nm]
-        for d in settings.retriever.chunk_list(
-            doc_id, ctx.tenant_id, [str(ctx.kb_id)],
-            fields=fields,
-            sort_by_position=True
-        ):
+        # ``id`` is included so the source-chunk provenance survives
+        # through summarization; the retriever otherwise drops it when
+        # ``fields`` is provided.
+        fields = ["id", "content_with_weight", vctr_nm]
+        for d in settings.retriever.chunk_list(doc_id, ctx.tenant_id, [str(ctx.kb_id)], fields=fields, sort_by_position=True):
             if vctr_nm not in d or d[vctr_nm] is None:
                 skipped_chunks += 1
                 logging.warning(f"RAPTOR: Chunk missing vector field '{vctr_nm}' in doc {doc_id}, skipping")
                 continue
-            chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
+            chunks.append((d["content_with_weight"], np.array(d[vctr_nm]), str(d.get("id") or "")))
 
         if skipped_chunks > 0:
-            self._task_context.progress_cb(
-                msg=f"[WARN] Skipped {skipped_chunks} chunks without vector field '{vctr_nm}' for doc {doc_id}."
-            )
+            self._task_context.progress_cb(msg=f"[WARN] Skipped {skipped_chunks} chunks without vector field '{vctr_nm}' for doc {doc_id}.")
         if not chunks:
             logging.warning(f"RAPTOR: No valid chunks with vectors found for doc {doc_id}")
             self._task_context.progress_cb(msg=f"[WARN] No valid chunks with vectors found for doc {doc_id}, skipping")
 
         return chunks
 
-    def _load_all_doc_chunks(
-        self, doc_ids: List[str], vctr_nm: str, skipped_doc_ids: Set[str]
-    ) -> List[Tuple[str, np.ndarray]]:
-        """Load chunks for all documents."""
+    def _load_all_doc_chunks(self, doc_ids: List[str], vctr_nm: str, skipped_doc_ids: Set[str]) -> List[Tuple[str, np.ndarray, str]]:
+        """Load chunks for all documents — returns provenance-carrying
+        ``(content, vector, chunk_id)`` triples. See ``_load_doc_chunks``
+        for the per-doc variant."""
         ctx = self._task_context
-        chunks = []
+        chunks: List[Tuple[str, np.ndarray, str]] = []
         skipped_chunks = 0
 
-        fields = ["content_with_weight", vctr_nm]
+        fields = ["id", "content_with_weight", vctr_nm]
         for doc_id in doc_ids:
             if doc_id in skipped_doc_ids:
                 continue
-            for d in settings.retriever.chunk_list(
-                doc_id, ctx.tenant_id, [str(ctx.kb_id)],
-                fields=fields,
-                sort_by_position=True
-            ):
+            for d in settings.retriever.chunk_list(doc_id, ctx.tenant_id, [str(ctx.kb_id)], fields=fields, sort_by_position=True):
                 if vctr_nm not in d or d[vctr_nm] is None:
                     skipped_chunks += 1
                     logging.warning(f"RAPTOR: Chunk missing vector field '{vctr_nm}' in doc {doc_id}, skipping")
                     continue
-                chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
+                chunks.append((d["content_with_weight"], np.array(d[vctr_nm]), str(d.get("id") or "")))
 
         if skipped_chunks > 0:
-            self._task_context.progress_cb(
-                msg=f"[WARN] Skipped {skipped_chunks} chunks without vector field '{vctr_nm}'."
-            )
+            self._task_context.progress_cb(msg=f"[WARN] Skipped {skipped_chunks} chunks without vector field '{vctr_nm}'.")
 
         return chunks
 
     async def _generate_raptor(
         self,
-        chunks: List[Tuple[str, np.ndarray]],
+        chunks: List[Tuple[str, np.ndarray, str]],
         doc_id: str,
         raptor_config: Dict,
         chat_mdl,
@@ -366,10 +358,19 @@ class RaptorService:
         clustering_method: str,
         max_errors: int,
         doc_info_by_id: Dict,
+        is_tree: bool = False,
     ) -> Tuple[List[Dict], int]:
-        """Run RAPTOR and generate summary chunks."""
+        """Run RAPTOR and generate summary chunks.
+
+        ``chunks`` is the provenance-carrying triple shape produced by
+        ``_load_doc_chunks`` / ``_load_all_doc_chunks``:
+        ``(content, vector, chunk_id)``. Each leaf is wrapped into the
+        ``(text, vec, [chunk_id])`` shape RAPTOR expects so every
+        summary it produces carries the order-preserving deduped union
+        of the leaf ids underneath it.
+        """
         ctx = self._task_context
-        from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
+        from rag.advanced_rag.knowlege_compile.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
 
         raptor_ext_config = raptor_config.get("ext") or {}
         assert chunks, "_generate_raptor must not be called with empty chunks"
@@ -389,13 +390,173 @@ class RaptorService:
             psi_bucket_size=raptor_ext_config.get("psi_bucket_size", 1024),
         )
 
-        original_length = len(chunks)
-        processed_chunks, layers = await raptor(
-            chunks, raptor_config["random_seed"], self._task_context.progress_cb, ctx.id
-        )
+        # Seed each leaf with its own id as the start of its
+        # ``source_chunk_ids`` provenance trail. The id may be empty
+        # for malformed retriever rows; ``Raptor.__call__`` filters
+        # those out of the union on the inbound normalize step.
+        raptor_input = [(content, vctr, [chunk_id] if chunk_id else []) for content, vctr, chunk_id in chunks]
 
         effective_doc_name = ctx.name if doc_id == GRAPH_RAPTOR_FAKE_DOC_ID else doc_info_by_id.get(doc_id, {}).get("name") or ctx.name
 
+        # Default path: ask RAPTOR for a single hierarchical tree dict
+        # and persist it as ONE non-searchable ES row. PSI's
+        # hyperedge-driven summarization can't form a strict
+        # parent-of relation, so __call__(is_tree=True) raises
+        # NotImplementedError there — catch and fall through to the
+        # legacy per-summary materialization below for that case.
+        original_length = len(chunks)
+        try:
+            processed_chunks, layers = await raptor(
+                raptor_input,
+                raptor_config["random_seed"],
+                self._task_context.progress_cb,
+                ctx.id,
+                is_tree=is_tree,
+            )
+        except NotImplementedError:
+            return await self._generate_raptor_legacy_rows(
+                raptor,
+                raptor_input,
+                raptor_config,
+                doc_id,
+                effective_doc_name,
+                tree_builder,
+                vctr_nm,
+            )
+
+        if processed_chunks is None:
+            return [], 0
+        doc = {
+            "doc_id": doc_id,
+            "kb_id": [str(ctx.kb_id)],
+            "docnm_kwd": effective_doc_name,
+            "title_tks": rag_tokenizer.tokenize(effective_doc_name),
+            "raptor_kwd": "raptor",
+            "extra": {"raptor_method": tree_builder},
+            "create_time": str(datetime.now()).replace("T", " ")[:19],
+            "create_timestamp_flt": datetime.now().timestamp(),
+        }
+        if ctx.pagerank:
+            doc[PAGERANK_FLD] = int(ctx.pagerank)
+
+        if not is_tree:
+            # Build index→layer mapping
+            chunk_layer = {}
+            for layer_idx, (layer_start, layer_end) in enumerate(layers):
+                if layer_idx == 0:
+                    continue
+                for ci in range(layer_start, layer_end):
+                    chunk_layer[ci] = layer_idx
+
+            res = []
+            tk_count = 0
+            for idx, (content, vctr, _, _) in enumerate(processed_chunks[original_length:], start=original_length):
+                d = copy.deepcopy(doc)
+                d["id"] = make_raptor_summary_chunk_id(content, doc_id)
+                d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
+                d["create_timestamp_flt"] = datetime.now().timestamp()
+                d[vctr_nm] = vctr.tolist()
+                d["content_with_weight"] = content
+                d["content_ltks"] = rag_tokenizer.tokenize(content)
+                d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
+                d["raptor_layer_int"] = chunk_layer.get(idx, 1)
+                res.append(d)
+                tk_count += num_tokens_from_string(content)
+            return res, tk_count
+
+        row_id = xxhash.xxh64(
+            f"raptor_tree:{doc_id}:{tree_builder}".encode("utf-8", "surrogatepass"),
+        ).hexdigest()
+        row = {
+            **doc,
+            "id": row_id,
+            "raptor_kwd": "raptor_tree",
+            "content_with_weight": json.dumps(processed_chunks, ensure_ascii=False),
+            "available_int": 0,
+        }
+        return [row], _sum_tree_text_tokens(processed_chunks)
+
+    async def build_doc_tree(
+        self,
+        chunks: List[Tuple[str, np.ndarray, str]],
+        raptor_config: Dict,
+        chat_mdl,
+        embd_mdl,
+        tree_builder: str,
+        clustering_method: str,
+        max_errors: int,
+    ) -> Optional[Dict]:
+        """Build a RAPTOR tree dict for one document — no ES IO.
+
+        Used by the ``tree``-kind compilation template, which wraps the
+        returned tree into a per-template structure-graph row. Returns
+        None when the input has no chunks, the PSI builder is selected
+        (which can't form a strict tree), or RAPTOR itself fails.
+        """
+        if not chunks:
+            return None
+        from rag.advanced_rag.knowlege_compile.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
+
+        raptor_ext_config = raptor_config.get("ext") or {}
+        raptor = Raptor(
+            raptor_config.get("max_cluster", 64),
+            chat_mdl,
+            embd_mdl,
+            raptor_config["prompt"],
+            raptor_config["max_token"],
+            raptor_config["threshold"],
+            max_errors=max_errors,
+            tree_builder=tree_builder,
+            clustering_method=clustering_method,
+            psi_exact_max_leaves=raptor_ext_config.get("psi_exact_max_leaves", 4096),
+            psi_bucket_size=raptor_ext_config.get("psi_bucket_size", 1024),
+        )
+
+        raptor_input = [(content, vctr, [chunk_id] if chunk_id else []) for content, vctr, chunk_id in chunks]
+        try:
+            tree, _ = await raptor(
+                raptor_input,
+                raptor_config["random_seed"],
+                self._task_context.progress_cb,
+                self._task_context.id,
+                is_tree=True,
+            )
+        except NotImplementedError:
+            # PSI builder — not supported in tree mode; surface as None
+            # so the compilation-template path can skip the doc cleanly.
+            logging.warning(
+                "build_doc_tree: PSI builder doesn't support is_tree; skipping",
+            )
+            return None
+        return tree if isinstance(tree, dict) else None
+
+    async def _generate_raptor_legacy_rows(
+        self,
+        raptor,
+        raptor_input,
+        raptor_config,
+        doc_id,
+        effective_doc_name,
+        tree_builder,
+        vctr_nm,
+    ) -> Tuple[List[Dict], int]:
+        """Legacy per-summary materialization, kept only for PSI builds.
+
+        PSI's hyperedge summaries don't map to a strict tree, so the
+        ``is_tree=True`` default in ``_generate_raptor`` raises and
+        falls through here. Same shape this function produced before
+        the tree migration — one ES row per appended summary, marked
+        ``raptor_kwd="raptor"``.
+        """
+        ctx = self._task_context
+        original_length = len(raptor_input)
+        processed_chunks, layers = await raptor(
+            raptor_input,
+            raptor_config["random_seed"],
+            self._task_context.progress_cb,
+            ctx.id,
+        )
+
         doc = {
             "doc_id": doc_id,
             "kb_id": [str(ctx.kb_id)],
@@ -407,7 +568,6 @@ class RaptorService:
         if ctx.pagerank:
             doc[PAGERANK_FLD] = int(ctx.pagerank)
 
-        # Build index→layer mapping
         chunk_layer = {}
         for layer_idx, (layer_start, layer_end) in enumerate(layers):
             if layer_idx == 0:
@@ -417,7 +577,12 @@ class RaptorService:
 
         res = []
         tk_count = 0
-        for idx, (content, vctr) in enumerate(processed_chunks[original_length:], start=original_length):
+        for idx, item in enumerate(processed_chunks[original_length:], start=original_length):
+            if len(item) >= 3:
+                content, vctr, source_chunk_ids = item[0], item[1], item[2] or []
+            else:
+                content, vctr = item[0], item[1]
+                source_chunk_ids = []
             d = copy.deepcopy(doc)
             d["id"] = make_raptor_summary_chunk_id(content, doc_id)
             d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
@@ -427,6 +592,8 @@ class RaptorService:
             d["content_ltks"] = rag_tokenizer.tokenize(content)
             d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
             d["raptor_layer_int"] = chunk_layer.get(idx, 1)
+            if source_chunk_ids:
+                d["source_chunk_ids"] = list(source_chunk_ids)
             res.append(d)
             tk_count += num_tokens_from_string(content)
 
@@ -445,16 +612,17 @@ class RaptorService:
         from common.doc_store.doc_store_base import OrderByExpr
 
         async def search_fields(fields: list, condition: dict, order_by=None):
-            res = await thread_pool_exec(
-                settings.docStoreConn.search,
-                fields, [], condition, [], order_by or OrderByExpr(),
-                0, 10000, search.index_name(tenant_id), [kb_id]
-            )
+            res = await thread_pool_exec(settings.docStoreConn.search, fields, [], condition, [], order_by or OrderByExpr(), 0, 10000, search.index_name(tenant_id), [kb_id])
             return settings.docStoreConn.get_fields(res, fields)
 
         try:
+            # Accept both ``raptor`` (legacy per-summary rows, PSI
+            # builder still produces these) and ``raptor_tree`` (new
+            # single-row tree blob) so existing-method detection stays
+            # accurate across the migration.
             primary = await search_fields(
-                ["raptor_kwd", "extra"], {"doc_id": doc_id, "raptor_kwd": ["raptor"]}
+                ["raptor_kwd", "extra"],
+                {"doc_id": doc_id, "raptor_kwd": ["raptor", "raptor_tree"]},
             )
             if collect_raptor_chunk_ids(primary):
                 return collect_raptor_methods(primary)
@@ -469,3 +637,188 @@ class RaptorService:
         except Exception:
             logging.exception("Failed to check RAPTOR chunks for doc %s", doc_id)
             raise
+
+    @staticmethod
+    def _build_raptor_graph(rows: List[Dict]) -> Dict:
+        """Project loaded RAPTOR summary rows onto the canvas graph shape.
+
+        Each row contributes one entity::
+
+            {
+              "id":          xxh128(content)           # 32-char hex
+              "name":        first 16 whitespace tokens
+              "description": content_with_weight
+              "source_chunk_ids": row.source_chunk_ids
+            }
+
+        Relations: full bipartite layer-by-layer fan-out — every node at
+        layer K gets an edge to every node at layer K-1 (because we only
+        loaded ``content_with_weight`` + ``raptor_layer_int`` we don't
+        have the specific parent linkage). Self-edges and dangling
+        targets are dropped (the latter only matters if the layer-int
+        values are non-contiguous).
+        """
+        # Build entities. Dedup by id so two identical-content summaries
+        # collapse to one node — the canvas can't render multiple nodes
+        # at the same id anyway, and identical content is a defensible
+        # collapse.
+        by_id: Dict[str, Dict] = {}
+        by_layer: Dict[int, List[str]] = {}
+
+        for row in rows:
+            content = row.get("content_with_weight")
+            if not isinstance(content, str) or not content.strip():
+                continue
+            try:
+                layer = int(row.get("raptor_layer_int") or 0)
+            except (TypeError, ValueError):
+                layer = 0
+            if layer <= 0:
+                # Layer 0 would be the original leaf chunks; RAPTOR
+                # summaries start at layer 1. Anything claiming layer 0
+                # here is malformed; skip.
+                continue
+
+            name = " ".join(content.split()[:16])
+            nid = xxhash.xxh128(
+                content.encode("utf-8", "surrogatepass"),
+            ).hexdigest()  # 32-char hex
+            if nid in by_id:
+                continue
+            source_chunk_ids = row.get("source_chunk_ids") or []
+            if not isinstance(source_chunk_ids, list):
+                source_chunk_ids = []
+            by_id[nid] = {
+                "id": nid,
+                "name": name,
+                "description": content,
+                "source_chunk_ids": list(source_chunk_ids),
+            }
+            by_layer.setdefault(layer, []).append(nid)
+
+        # Layered fan-out from parent (higher layer) → child (lower layer).
+        relations: List[Dict] = []
+        layers_sorted = sorted(by_layer.keys())
+        for layer in layers_sorted:
+            child_layer = layer - 1
+            if child_layer not in by_layer:
+                continue
+            for parent in by_layer[layer]:
+                for child in by_layer[child_layer]:
+                    if parent == child:
+                        continue
+                    relations.append({"from": parent, "to": child})
+
+        return {"entities": list(by_id.values()), "relations": relations}
+
+    async def _persist_raptor_graph_to_es(self, doc_id: str) -> None:
+        """Load the just-inserted RAPTOR summaries for ``doc_id`` and
+        persist a single graph row that the dataset structure-graph
+        endpoint can surface as a tree.
+
+        Loads only ``content_with_weight`` + ``raptor_layer_int`` +
+        ``source_chunk_ids`` (per
+        the smallest-payload contract) and writes one row with::
+
+            compile_kwd:                  "raptor_graph"
+            compilation_template_kind_kwd:"raptor"
+            doc_id:                       <doc_id>
+
+        The row id is deterministic per ``(kb_id, doc_id)`` so re-runs
+        delete-and-replace cleanly through the same primary key.
+        ``knowledge_graph_kwd`` is intentionally NOT set — that field
+        belongs to the KG feature; this row is identified via
+        ``compile_kwd`` so the two paths stay semantically distinct.
+        """
+        from common.doc_store.doc_store_base import OrderByExpr
+
+        ctx = self._task_context
+        tenant_id = ctx.tenant_id
+        kb_id_str = str(ctx.kb_id)
+        index_nm = search.index_name(tenant_id)
+        select_fields = ["content_with_weight", "raptor_layer_int", "source_chunk_ids"]
+        try:
+            res = await thread_pool_exec(
+                settings.docStoreConn.search,
+                select_fields,
+                [],
+                {"raptor_kwd": ["raptor"], "doc_id": [doc_id]},
+                [],
+                OrderByExpr(),
+                0,
+                10000,
+                index_nm,
+                [kb_id_str],
+            )
+            field_map = settings.docStoreConn.get_fields(res, select_fields)
+        except Exception:
+            logging.exception(
+                "raptor_graph: load failed for kb=%s doc=%s",
+                kb_id_str,
+                doc_id,
+            )
+            return
+
+        rows = list((field_map or {}).values())
+        if not rows:
+            logging.info(
+                "raptor_graph: no summaries to render for kb=%s doc=%s",
+                kb_id_str,
+                doc_id,
+            )
+            return
+
+        graph = self._build_raptor_graph(rows)
+        if not graph["entities"]:
+            logging.info(
+                "raptor_graph: projection produced no entities for kb=%s doc=%s",
+                kb_id_str,
+                doc_id,
+            )
+            return
+
+        row_id = xxhash.xxh64(
+            f"raptor_graph:{kb_id_str}:{doc_id}".encode("utf-8", "surrogatepass"),
+        ).hexdigest()
+        row = {
+            "id": row_id,
+            "kb_id": kb_id_str,
+            "doc_id": doc_id,
+            "compile_kwd": "raptor_graph",
+            "compilation_template_kind_kwd": "raptor",
+            "content_with_weight": json.dumps(graph, ensure_ascii=False),
+            "available_int": 0,
+        }
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.delete,
+                {"compile_kwd": "raptor_graph", "doc_id": [doc_id]},
+                index_nm,
+                ctx.kb_id,
+            )
+        except Exception:
+            logging.debug(
+                "raptor_graph: prior delete failed for kb=%s doc=%s; relying on id-upsert",
+                kb_id_str,
+                doc_id,
+            )
+        try:
+            await thread_pool_exec(
+                settings.docStoreConn.insert,
+                [row],
+                index_nm,
+                ctx.kb_id,
+            )
+            logging.info(
+                "raptor_graph: stored %d entities / %d relations for kb=%s doc=%s",
+                len(graph["entities"]),
+                len(graph["relations"]),
+                kb_id_str,
+                doc_id,
+            )
+        except Exception:
+            logging.exception(
+                "raptor_graph: insert failed for kb=%s doc=%s",
+                kb_id_str,
+                doc_id,
+            )
diff --git a/rag/svr/task_executor_refactor/raptor_utils.py b/rag/svr/task_executor_refactor/raptor_utils.py
index f98975bfb2..9b423c48d9 100644
--- a/rag/svr/task_executor_refactor/raptor_utils.py
+++ b/rag/svr/task_executor_refactor/raptor_utils.py
@@ -38,11 +38,7 @@ async def get_raptor_chunk_field_map(doc_id: str, tenant_id: str, kb_id: str) ->
 
     async def search_fields(fields: list[str], condition: dict, order_by=None):
         """Search chunk fields in the current knowledge base."""
-        res = await thread_pool_exec(
-            settings.docStoreConn.search,
-            fields, [], condition, [], order_by or OrderByExpr(),
-            0, RAPTOR_METHOD_SEARCH_LIMIT, nlp_search.index_name(tenant_id), [kb_id]
-        )
+        res = await thread_pool_exec(settings.docStoreConn.search, fields, [], condition, [], order_by or OrderByExpr(), 0, RAPTOR_METHOD_SEARCH_LIMIT, nlp_search.index_name(tenant_id), [kb_id])
         return settings.docStoreConn.get_fields(res, fields)
 
     primary = await search_fields(["raptor_kwd", "extra"], {"doc_id": doc_id, "raptor_kwd": ["raptor"]})
@@ -65,11 +61,17 @@ async def delete_raptor_chunks(doc_id: str, tenant_id: str, kb_id: str, keep_met
     if keep_method is None:
         logging.info(
             "delete_raptor_chunks: removing all RAPTOR summaries (doc=%s tenant=%s kb=%s)",
-            doc_id, tenant_id, kb_id,
+            doc_id,
+            tenant_id,
+            kb_id,
         )
+        # Sweep both row types — legacy per-summary (``raptor``, still
+        # used by the PSI builder) and the new single tree blob
+        # (``raptor_tree``) — so re-runs always start from a clean
+        # slate regardless of which path produced the prior state.
         await thread_pool_exec(
             settings.docStoreConn.delete,
-            {"doc_id": doc_id, "raptor_kwd": ["raptor"]},
+            {"doc_id": doc_id, "raptor_kwd": ["raptor", "raptor_tree"]},
             nlp_search.index_name(tenant_id),
             kb_id,
         )
@@ -80,13 +82,20 @@ async def delete_raptor_chunks(doc_id: str, tenant_id: str, kb_id: str, keep_met
     if not chunk_ids:
         logging.debug(
             "delete_raptor_chunks: no stale RAPTOR chunks to remove (doc=%s tenant=%s kb=%s keep=%s)",
-            doc_id, tenant_id, kb_id, keep_method,
+            doc_id,
+            tenant_id,
+            kb_id,
+            keep_method,
         )
         return 0
 
     logging.info(
         "delete_raptor_chunks: removing %d stale RAPTOR chunks (doc=%s tenant=%s kb=%s keep=%s)",
-        len(chunk_ids), doc_id, tenant_id, kb_id, keep_method,
+        len(chunk_ids),
+        doc_id,
+        tenant_id,
+        kb_id,
+        keep_method,
     )
     await thread_pool_exec(
         settings.docStoreConn.delete,
diff --git a/rag/svr/task_executor_refactor/task_handler.py b/rag/svr/task_executor_refactor/task_handler.py
index a22d407de4..73f160f096 100644
--- a/rag/svr/task_executor_refactor/task_handler.py
+++ b/rag/svr/task_executor_refactor/task_handler.py
@@ -23,20 +23,26 @@ for handling document processing tasks with refactored, testable methods.
 import asyncio
 import logging
 import json
+
+# Wiki / artifact compilation pipeline lives in
+# ``rag.svr.task_executor_refactor.dataset_wiki_generator`` — see the
+# ``task_type == "artifact"`` branch of ``TaskHandler.run`` for the
+# dispatch call.
+# Document-structure compilation helpers (CHAIN_KINDS,
+# compile_structure_from_text, merge_compiled_structures,
+# validate_and_correct_chain) moved to ``chunk_post_processor``.
 import xxhash
 
 from timeit import default_timer as timer
-from typing import Callable, Dict, List, Optional
+from typing import AsyncIterator, Callable, Dict, List, Optional
 
 from api.db.services.document_service import DocumentService
 from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.compilation_template_group_service import CompilationTemplateGroupService
 from api.db.joint_services.memory_message_service import handle_save_to_memory_task
-from api.db.joint_services.tenant_model_service import (
-    get_tenant_default_model_by_type,
-    get_model_config_from_provider_instance
-)
+from api.db.joint_services.tenant_model_service import get_tenant_default_model_by_type, get_model_config_from_provider_instance
 from api.db.services.llm_service import LLMBundle
-from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID
+from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID, abort_doc_chunking_counter
 from common.constants import LLMType
 from common.exceptions import TaskCanceledException
 from common.connection_utils import timeout
@@ -57,6 +63,96 @@ from rag.prompts.generator import run_toc_from_text
 from common import settings
 
 
+def _parser_config_compilation_template_group_ids(parser_config) -> list[str]:
+    """Read template-group ids from a doc's parser_config.
+
+    Templates were previously referenced as a list
+    (``compilation_template_ids``); after the template-group refactor
+    a doc instead points at one or more groups, and the orchestrator
+    resolves each group's child templates at runtime. Old
+    ``compilation_template_ids`` data is intentionally ignored per
+    the migration spec.
+    """
+
+    def _normalize(raw) -> list[str]:
+        if isinstance(raw, str):
+            raw = [raw]
+        if not isinstance(raw, list):
+            return []
+        ids: list[str] = []
+        seen: set[str] = set()
+        for gid in raw:
+            if not isinstance(gid, str):
+                continue
+            gid = gid.strip()
+            if gid and gid not in seen:
+                seen.add(gid)
+                ids.append(gid)
+        return ids
+
+    if not isinstance(parser_config, dict):
+        return []
+    if "compilation_template_group_id" in parser_config:
+        return _normalize(parser_config.get("compilation_template_group_id"))
+    ext = parser_config.get("ext")
+    if isinstance(ext, dict):
+        return _normalize(ext.get("compilation_template_group_id"))
+    return []
+
+
+def _parser_config_compilation_template_ids(parser_config, tenant_id: str) -> list[str]:
+    """Resolve a doc's parser_config to compile-template ids by
+    looking up configured groups. Returns ``[]`` if the doc has no
+    group set or no group can be resolved.
+    """
+    template_ids: list[str] = []
+    seen: set[str] = set()
+    for group_id in _parser_config_compilation_template_group_ids(parser_config):
+        for template_id in CompilationTemplateGroupService.resolve_template_ids(group_id, tenant_id):
+            if template_id in seen:
+                continue
+            seen.add(template_id)
+            template_ids.append(template_id)
+    return template_ids
+
+
+def _resolve_template_chat_llm_id(parser_cfg: dict, ctx) -> str:
+    """Pick the chat model id for a knowledge-compilation template.
+
+    Resolution order:
+      1. The template's own ``llm_id`` (what the user picked in the
+         compilation-template panel).
+      2. The doc's ``parser_config.llm_id`` (the doc-level chunking
+         model).
+      3. ``ctx.llm_id`` (the chunking task's default).
+    """
+    if isinstance(parser_cfg, dict):
+        tid = parser_cfg.get("llm_id")
+        if isinstance(tid, str) and tid.strip():
+            return tid.strip()
+    doc_cfg = getattr(ctx, "parser_config", None) or {}
+    if isinstance(doc_cfg, dict):
+        did = doc_cfg.get("llm_id")
+        if isinstance(did, str) and did.strip():
+            return did.strip()
+    return ctx.llm_id
+
+
+# Document-structure compilation tunables
+# (DOC_STRUCTURE_COMPILE_BATCH_CHUNKS, DOC_STRUCTURE_MERGE_MAX_DOCS,
+# STRUCTURE_CHAIN_CORRECTION_TIMEOUT_S) moved to
+# ``chunk_post_processor``.
+
+# Wiki / artifact tunables (``WIKI_MAP_BATCH_CHUNKS``,
+# ``WIKI_GRAPH_MAX_CHUNK_IDS_PER_NODE``, commit-title / comments
+# templates) moved to ``dataset_wiki_generator``.
+
+# The corpus → skill compilation pipeline lives in
+# ``rag.svr.task_executor_refactor.dataset_skill_generator``. Its entry
+# point is :func:`run_corpus2skill`; this handler invokes it from the
+# ``task_type == "skill"`` branch of ``run`` below.
+
+
 class TaskHandler:
     """Main task handler for document processing.
 
@@ -85,32 +181,52 @@ class TaskHandler:
         self._task_context = ctx
         self._billing_hook = billing_hook
 
+    @staticmethod
+    def _is_standard_chunking_task(task_type: str) -> bool:
+        task_type = (task_type or "").lower()
+        return task_type not in {
+            "memory",
+            "raptor",
+            "graphrag",
+            "mindmap",
+            "artifact",
+            "skill",
+            "evaluation",
+            "reembedding",
+            "clone",
+        } and not task_type.startswith("dataflow")
+
     async def handle_task(self) -> None:
         try:
             await self.handle()
+        except Exception:
+            if self._is_standard_chunking_task(self._task_context.task_type):
+                abort_doc_chunking_counter(self._task_context.doc_id)
+            raise
         finally:
             task_id = self._task_context.id
             task_tenant_id = self._task_context.tenant_id
             task_dataset_id = self._task_context.kb_id
             task_doc_id = self._task_context.doc_id
             if self._task_context.has_canceled_func(task_id):
-                try:
-                    exists = await thread_pool_exec(
-                        settings.docStoreConn.index_exist,
-                        search.index_name(task_tenant_id),
-                        task_dataset_id,
-                    )
-                    if exists:
-                        ret = await thread_pool_exec(
-                            settings.docStoreConn.delete,
-                            {"doc_id": task_doc_id},
+                if self._is_standard_chunking_task(self._task_context.task_type):
+                    abort_doc_chunking_counter(task_doc_id)
+                    try:
+                        exists = await thread_pool_exec(
+                            settings.docStoreConn.index_exist,
                             search.index_name(task_tenant_id),
                             task_dataset_id,
                         )
-                        self._task_context.recording_context.save_func_return_value("docStoreConn.delete", ret)
-                except Exception as e:
-                    logging.exception(
-                        f"Remove doc({task_doc_id}) from docStore failed when task({task_id}) canceled, exception: {e}")
+                        if exists:
+                            ret = await thread_pool_exec(
+                                settings.docStoreConn.delete,
+                                {"doc_id": task_doc_id},
+                                search.index_name(task_tenant_id),
+                                task_dataset_id,
+                            )
+                            self._task_context.recording_context.save_func_return_value("docStoreConn.delete", ret)
+                    except Exception as e:
+                        logging.exception(f"Remove doc({task_doc_id}) from docStore failed when task({task_id}) canceled, exception: {e}")
 
     @timeout(60 * 60 * 3, 1)
     async def handle(self) -> None:
@@ -134,7 +250,7 @@ class TaskHandler:
             ctx.progress_cb(-1, msg="Task has been canceled.")
             return
 
-        # Language defaults to "Chinese" via TaskContext._DEFAULTS — safe to bind model directly.
+        # Language defaults to "Chinese" via TaskContext._DEFAULTS 鈥?safe to bind model directly.
         # Bind embedding model (matching original do_handle_task order: bind + init_kb before routing)
         result = await self._bind_embedding_model()
         if result is None:
@@ -154,12 +270,30 @@ class TaskHandler:
                 return
 
             # Route to appropriate handler
-            if task_type == "raptor":
-                await self._run_raptor(embedding_model, vector_size)
-            elif task_type == "graphrag":
+            if task_type == "graphrag":
                 await self._run_graphrag(embedding_model)
             elif task_type == "mindmap":
                 ctx.progress_cb(1, "place holder")
+            elif task_type == "artifact":
+                from rag.svr.task_executor_refactor.dataset_wiki_generator import (
+                    run_wiki,
+                )
+
+                await run_wiki(
+                    self._task_context,
+                    embedding_model,
+                    self._load_chunks_for_doc,
+                )
+            elif task_type == "skill":
+                from rag.svr.task_executor_refactor.dataset_skill_generator import (
+                    run_corpus2skill,
+                )
+
+                await run_corpus2skill(
+                    self._task_context,
+                    embedding_model,
+                    self._load_chunks_for_doc,
+                )
             elif task_type == "evaluation":
                 await self._run_evaluation()
             elif task_type == "reembedding":
@@ -167,8 +301,7 @@ class TaskHandler:
             elif task_type == "clone":
                 await self._run_clone()
             else:
-                await self._run_standard_chunking(embedding_model)
-
+                await self._run_standard_chunking(embedding_model, vector_size)
 
     def _init_kb(self, vector_size: int) -> None:
         """Initialize knowledge base index."""
@@ -214,18 +347,14 @@ class TaskHandler:
 
         try:
             if task_embedding_id:
-                embd_model_config = get_model_config_from_provider_instance(
-                    task_tenant_id, LLMType.EMBEDDING, task_embedding_id
-                )
+                embd_model_config = get_model_config_from_provider_instance(task_tenant_id, LLMType.EMBEDDING, task_embedding_id)
             else:
-                embd_model_config = get_tenant_default_model_by_type(
-                    task_tenant_id, LLMType.EMBEDDING
-                )
+                embd_model_config = get_tenant_default_model_by_type(task_tenant_id, LLMType.EMBEDDING)
             embedding_model = LLMBundle(task_tenant_id, embd_model_config, lang=task_language)
             vts, _ = embedding_model.encode(["ok"])
             return embedding_model, len(vts[0])
         except Exception as e:
-            error_message = f'Fail to bind embedding model: {str(e)}'
+            error_message = f"Fail to bind embedding model: {str(e)}"
             ctx.progress_cb(-1, msg=error_message)
             logging.exception(error_message)
             raise
@@ -234,6 +363,7 @@ class TaskHandler:
         self,
         embedding_model: LLMBundle,
         vector_size: int,
+        mark_done: bool = True,
     ) -> None:
         """Run RAPTOR summary generation."""
         ctx = self._task_context
@@ -248,19 +378,21 @@ class TaskHandler:
 
         kb_parser_config = kb.parser_config
         if not kb_parser_config.get("raptor", {}).get("use_raptor", False):
-            kb_parser_config.update({
-                "raptor": {
-                    "use_raptor": True,
-                    "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n      {cluster_content}\nThe above is the content you need to summarize.",
-                    "max_token": 256,
-                    "threshold": 0.1,
-                    "max_cluster": 64,
-                    "random_seed": 0,
-                    "scope": "file",
-                    "clustering_method": "gmm",
-                    "tree_builder": "raptor",
-                },
-            })
+            kb_parser_config.update(
+                {
+                    "raptor": {
+                        "use_raptor": True,
+                        "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n      {cluster_content}\nThe above is the content you need to summarize.",
+                        "max_token": 256,
+                        "threshold": 0.1,
+                        "max_cluster": 64,
+                        "random_seed": 0,
+                        "scope": "file",
+                        "clustering_method": "gmm",
+                        "tree_builder": "raptor",
+                    },
+                }
+            )
             if ctx.write_interceptor:
                 update_result = ctx.write_interceptor.intercept("KnowledgebaseService.update_by_id")
             else:
@@ -271,11 +403,8 @@ class TaskHandler:
                 return
 
         # Bind LLM for raptor
-        chat_model_config = get_model_config_from_provider_instance(
-            task_tenant_id, LLMType.CHAT, kb_task_llm_id
-        )
+        chat_model_config = get_model_config_from_provider_instance(task_tenant_id, LLMType.CHAT, kb_task_llm_id)
         with LLMBundle(task_tenant_id, chat_model_config, lang=ctx.language) as chat_model:
-
             # Run RAPTOR
             raptor_service = RaptorService(ctx=ctx)
 
@@ -285,7 +414,7 @@ class TaskHandler:
                     chat_mdl=chat_model,
                     embd_mdl=embedding_model,
                     vector_size=vector_size,
-                    doc_ids=ctx.doc_ids,
+                    doc_ids=ctx.doc_ids or [ctx.doc_id],
                 )
 
             ctx.recording_context.record("raptor_chunks", chunks)
@@ -293,7 +422,7 @@ class TaskHandler:
 
             # Insert RAPTOR chunks
             if chunks:
-                task_doc_id = (ctx.doc_ids or [GRAPH_RAPTOR_FAKE_DOC_ID])[0]
+                task_doc_id = (ctx.doc_ids or [ctx.doc_id] or [GRAPH_RAPTOR_FAKE_DOC_ID])[0]
                 chunk_service = ChunkService(ctx=ctx)
                 insert_result = await chunk_service.insert_chunks(ctx.id, task_tenant_id, task_dataset_id, chunks)
                 if insert_result:
@@ -304,27 +433,43 @@ class TaskHandler:
                 # Cleanup stale RAPTOR chunks
                 cleaned_chunks = 0
                 for cleanup_doc_id, keep_method in raptor_cleanup_chunks:
-                    ret = await self._delete_raptor_chunks(
-                        cleanup_doc_id, task_tenant_id, task_dataset_id, keep_method
-                    )
+                    ret = await self._delete_raptor_chunks(cleanup_doc_id, task_tenant_id, task_dataset_id, keep_method)
                     cleaned_chunks += ret
 
                 if cleaned_chunks:
                     ctx.progress_cb(msg=f"Cleaned up {cleaned_chunks} stale RAPTOR chunks.")
 
+                # Build the per-doc RAPTOR tree graph from the just-
+                # inserted summaries. Each chunk in ``chunks`` carries
+                # the doc_id it was written under (real doc id for
+                # scope="file"; GRAPH_RAPTOR_FAKE_DOC_ID for the
+                # dataset-scope path). We materialize one graph row per
+                # distinct doc_id so the dataset structure-graph
+                # endpoint can surface a RAPTOR tab per document.
+                # Failure here is best-effort — the summaries are
+                # already persisted; the tab just won't render.
+                raptor_doc_ids = {str(c.get("doc_id")) for c in chunks if c.get("doc_id")}
+                for raptor_doc_id in raptor_doc_ids:
+                    try:
+                        await raptor_service._persist_raptor_graph_to_es(raptor_doc_id)
+                    except Exception:
+                        logging.exception(
+                            "raptor_graph: build failed for kb=%s doc=%s",
+                            task_dataset_id,
+                            raptor_doc_id,
+                        )
+
                 # Update document stats
                 if ctx.write_interceptor:
                     ctx.write_interceptor.intercept("DocumentService.increment_chunk_num")
                 else:
                     DocumentService.increment_chunk_num(task_doc_id, task_dataset_id, token_count, len(chunks), 0)
 
-            ctx.recording_context.record("task_status", "completed")
-            ctx.progress_cb(prog=1.0, msg="RAPTOR done")
+            if mark_done:
+                ctx.recording_context.record("task_status", "completed")
+                ctx.progress_cb(prog=1.0, msg="RAPTOR done")
 
-    async def _run_graphrag(
-        self,
-        embedding_model: LLMBundle
-    ) -> None:
+    async def _run_graphrag(self, embedding_model: LLMBundle) -> None:
         """Run GraphRAG."""
         ctx = self._task_context
         task_tenant_id = ctx.tenant_id
@@ -339,29 +484,31 @@ class TaskHandler:
 
         kb_parser_config = kb.parser_config
         if not kb_parser_config.get("graphrag", {}).get("use_graphrag", False):
-            kb_parser_config.update({
-                "graphrag": {
-                    "use_graphrag": True,
-                    "entity_types": [
-                        "organization",
-                        "person",
-                        "geo",
-                        "event",
-                        "category",
-                    ],
-                    "method": "light",
-                    "batch_chunk_token_size": 4096,
-                    "retry_attempts": 2,
-                    "retry_backoff_seconds": 2.0,
-                    "retry_backoff_max_seconds": 60.0,
-                    "build_subgraph_timeout_per_chunk_seconds": 300,
-                    "build_subgraph_min_timeout_seconds": 600,
-                    "merge_timeout_seconds": 180,
-                    "resolution_timeout_seconds": 1800,
-                    "community_timeout_seconds": 1800,
-                    "lock_acquire_timeout_seconds": 600,
+            kb_parser_config.update(
+                {
+                    "graphrag": {
+                        "use_graphrag": True,
+                        "entity_types": [
+                            "organization",
+                            "person",
+                            "geo",
+                            "event",
+                            "category",
+                        ],
+                        "method": "light",
+                        "batch_chunk_token_size": 4096,
+                        "retry_attempts": 2,
+                        "retry_backoff_seconds": 2.0,
+                        "retry_backoff_max_seconds": 60.0,
+                        "build_subgraph_timeout_per_chunk_seconds": 300,
+                        "build_subgraph_min_timeout_seconds": 600,
+                        "merge_timeout_seconds": 180,
+                        "resolution_timeout_seconds": 1800,
+                        "community_timeout_seconds": 1800,
+                        "lock_acquire_timeout_seconds": 600,
+                    }
                 }
-            })
+            )
             if ctx.write_interceptor:
                 update_result = ctx.write_interceptor.intercept("KnowledgebaseService.update_by_id")
             else:
@@ -372,11 +519,8 @@ class TaskHandler:
 
         graphrag_conf = kb_parser_config.get("graphrag", {})
         start_ts = timer()
-        chat_model_config = get_model_config_from_provider_instance(
-            task_tenant_id, LLMType.CHAT, kb_task_llm_id
-        )
+        chat_model_config = get_model_config_from_provider_instance(task_tenant_id, LLMType.CHAT, kb_task_llm_id)
         with LLMBundle(task_tenant_id, chat_model_config, lang=task_language) as chat_model:
-
             with_resolution = graphrag_conf.get("resolution", False)
             with_community = graphrag_conf.get("community", False)
 
@@ -399,7 +543,20 @@ class TaskHandler:
 
     async def _run_standard_chunking(
         self,
-        embedding_model: LLMBundle
+        embedding_model: LLMBundle,
+        vector_size: int,
+    ) -> None:
+        ctx = self._task_context
+        try:
+            await self._run_standard_chunking_impl(embedding_model, vector_size)
+        except Exception:
+            abort_doc_chunking_counter(ctx.doc_id)
+            raise
+
+    async def _run_standard_chunking_impl(
+        self,
+        embedding_model: LLMBundle,
+        vector_size: int,
     ) -> None:
         """Run standard chunking pipeline."""
         ctx = self._task_context
@@ -409,7 +566,7 @@ class TaskHandler:
         task_doc_id = ctx.doc_id
         task_start_ts = timer()
         doc_task_llm_id = ctx.parser_config.get("llm_id") or ctx.llm_id
-        ctx.raw_task['llm_id'] = doc_task_llm_id
+        ctx.raw_task["llm_id"] = doc_task_llm_id
 
         # Build chunks
         start_ts = timer()
@@ -419,9 +576,7 @@ class TaskHandler:
         bucket, name = File2DocumentService.get_storage_address(doc_id=ctx.doc_id)
         binary = await self._get_storage_binary(bucket, name)
         if binary is None:
-            raise FileNotFoundError(
-                f"Can not find file <{ctx.name}> from minio. Could you try it again."
-            )
+            raise FileNotFoundError(f"Can not find file <{ctx.name}> from minio. Could you try it again.")
 
         chunks = await chunk_service.build_chunks(binary)
         ctx.recording_context.record("chunks", chunks)
@@ -431,7 +586,18 @@ class TaskHandler:
         logging.info("Build document {}: {:.2f}s".format(ctx.name, timer() - start_ts))
 
         if not chunks:
-            ctx.progress_cb(1., msg=f"No chunk built from {ctx.name}")
+            ctx.progress_cb(msg=f"No chunk built from {ctx.name}")
+            if not await self._run_document_post_chunking_if_last(
+                embedding_model,
+                vector_size,
+                task_start_ts,
+                0,
+                0,
+            ):
+                return
+            task_time_cost = timer() - task_start_ts
+            ctx.recording_context.record("task_status", "completed")
+            ctx.progress_cb(prog=1.0, msg="Task done ({:.2f}s)".format(task_time_cost))
             return
 
         ctx.progress_cb(msg="Generate {} chunks".format(len(chunks)))
@@ -440,9 +606,7 @@ class TaskHandler:
         start_ts = timer()
         embedding_service = EmbeddingService(ctx=ctx)
         try:
-            token_count, vector_size = await embedding_service.embed_chunks(
-                chunks, embedding_model, ctx.parser_config
-            )
+            token_count, vector_size = await embedding_service.embed_chunks(chunks, embedding_model, ctx.parser_config)
         except TaskCanceledException:
             raise
         except Exception as e:
@@ -457,11 +621,6 @@ class TaskHandler:
         logging.info(progress_message)
         ctx.progress_cb(msg=progress_message)
 
-        # Build TOC if needed
-        toc_thread = None
-        if ctx.parser_id.lower() == "naive" and ctx.parser_config.get("toc_extraction", False):
-            toc_thread = asyncio.create_task(asyncio.to_thread(self._build_toc, ctx, chunks, ctx.progress_cb))
-
         # Insert chunks
         chunk_count = len(set([chunk["id"] for chunk in chunks]))
         start_ts = timer()
@@ -469,15 +628,15 @@ class TaskHandler:
         chunk_service = ChunkService(ctx=ctx)
 
         if ctx.has_canceled_func(task_id):
+            abort_doc_chunking_counter(task_doc_id)
             ctx.progress_cb(-1, msg="Task has been canceled.")
             return
 
-        insert_result = await chunk_service.insert_chunks(
-            task_id, task_tenant_id, task_dataset_id, chunks
-        )
+        insert_result = await chunk_service.insert_chunks(task_id, task_tenant_id, task_dataset_id, chunks)
 
         if not insert_result:
             ctx.recording_context.record("insertion_result", "failed")
+            abort_doc_chunking_counter(task_doc_id)
             return
         ctx.recording_context.record("insertion_result", "success")
 
@@ -487,12 +646,8 @@ class TaskHandler:
 
         ctx.progress_cb(msg="Indexing done ({:.2f}s).".format(timer() - start_ts))
 
-        toc_chunk = await self._process_toc_thread(toc_thread)
-        if toc_chunk:
-            ctx.recording_context.record("toc_chunk", [toc_chunk])
-            await post_processor.insert_toc_chunk(toc_chunk, chunk_service)
-
         if ctx.has_canceled_func(task_id):
+            abort_doc_chunking_counter(task_doc_id)
             ctx.progress_cb(-1, msg="Task has been canceled.")
             return
 
@@ -502,17 +657,44 @@ class TaskHandler:
         else:
             DocumentService.increment_chunk_num(task_doc_id, task_dataset_id, token_count, chunk_count, 0)
 
+        if not await self._run_document_post_chunking_if_last(
+            embedding_model,
+            vector_size,
+            task_start_ts,
+            len(chunks),
+            token_count,
+        ):
+            return
+
         task_time_cost = timer() - task_start_ts
         ctx.recording_context.record("task_status", "completed")
         ctx.progress_cb(prog=1.0, msg="Task done ({:.2f}s)".format(task_time_cost))
 
-        logging.info(
-            "Chunk doc({}), page({}-{}), chunks({}), token({}), elapsed:{:.2f}".format(
-                ctx.name, ctx.from_page, ctx.to_page,
-                len(chunks), token_count, task_time_cost
-            )
+        logging.info("Chunk doc({}), page({}-{}), chunks({}), token({}), elapsed:{:.2f}".format(ctx.name, ctx.from_page, ctx.to_page, len(chunks), token_count, task_time_cost))
+
+    async def _run_document_post_chunking_if_last(
+        self,
+        embedding_model: LLMBundle,
+        vector_size: int,
+        task_start_ts: float,
+        chunks_len: int,
+        token_count: int,
+    ) -> bool:
+        """Thin delegator. The pipeline lives in
+        ``rag.svr.task_executor_refactor.chunk_post_processor``.
+        """
+        from rag.svr.task_executor_refactor.chunk_post_processor import (
+            run_document_post_chunking_if_last,
         )
 
+        return await run_document_post_chunking_if_last(
+            self,
+            embedding_model,
+            vector_size,
+            task_start_ts,
+            chunks_len,
+            token_count,
+        )
 
     async def _process_toc_thread(self, toc_thread):
         try:
@@ -527,30 +709,105 @@ class TaskHandler:
     @classmethod
     async def _get_storage_binary(cls, bucket: str, name: str) -> bytes:
         from common import settings
+
         """Get binary from storage."""
         return await thread_pool_exec(settings.STORAGE_IMPL.get, bucket, name)
 
+    @staticmethod
+    async def _load_chunks_for_doc(
+        tenant_id: str,
+        kb_id: str,
+        doc_id: str,
+        batch_size: int = 500,
+    ) -> AsyncIterator[List[Dict]]:
+        """Stream a document's chunks from the doc store one batch at a time.
+
+        Async generator that yields successive batches of up to ``batch_size``
+        chunks. Order is pushed to the doc store via
+        ``OrderByExpr().asc("page_num_int").asc("top_int")`` so callers do
+        not need to re-sort. Rows with a ``compile_kwd`` marker (artifact
+        pages, structure entities, etc.) are filtered out defensively.
+
+        Memory is bounded by ``batch_size``: at most one page is materialised
+        at a time, so long documents do not balloon the worker's heap.
+        """
+        from common.doc_store.doc_store_base import OrderByExpr
+
+        index_nm = search.index_name(tenant_id)
+        if not settings.docStoreConn.index_exist(index_nm, kb_id):
+            return
+
+        select_fields = [
+            "id",
+            "doc_id",
+            "content_with_weight",
+            "page_num_int",
+            "top_int",
+        ]
+        order_by = OrderByExpr()
+        order_by.asc("page_num_int")
+        order_by.asc("top_int")
+
+        offset = 0
+        while True:
+            try:
+                res = await thread_pool_exec(
+                    settings.docStoreConn.search,
+                    select_fields,
+                    [],
+                    {"doc_id": [doc_id], "available_int": 1},
+                    [],
+                    order_by,
+                    offset,
+                    batch_size,
+                    index_nm,
+                    [kb_id],
+                )
+                field_map = settings.docStoreConn.get_fields(res, select_fields)
+            except Exception:
+                logging.exception("load_chunks_for_doc: failed to load chunks for doc=%s", doc_id)
+                return
+            if not field_map:
+                return
+
+            batch: List[Dict] = []
+            for row_id, row in field_map.items():
+                if row.get("compile_kwd"):
+                    continue
+                batch.append(
+                    {
+                        "id": row_id,
+                        "doc_id": row.get("doc_id") or doc_id,
+                        "content_with_weight": row.get("content_with_weight") or "",
+                        "page_num_int": row.get("page_num_int", 0),
+                        "top_int": row.get("top_int", 0),
+                    }
+                )
+            if batch:
+                yield batch
+            if len(field_map) < batch_size:
+                return
+            offset += batch_size
+
     @classmethod
     def _build_toc(cls, ctx: TaskContext, docs: List[Dict], progress_cb: Callable) -> Optional[Dict]:
         """Build table of contents."""
         progress_cb(msg="Start to generate table of content ...")
-        chat_model_config = get_model_config_from_provider_instance(
-            ctx.tenant_id, LLMType.CHAT, ctx.llm_id
-        )
+        chat_model_config = get_model_config_from_provider_instance(ctx.tenant_id, LLMType.CHAT, ctx.llm_id)
         with LLMBundle(ctx.tenant_id, chat_model_config, lang=ctx.language) as chat_mdl:
-
-            docs = sorted(docs, key=lambda d: (
-                d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
-                d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
-            ))
+            docs = sorted(
+                docs,
+                key=lambda d: (
+                    d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
+                    d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0),
+                ),
+            )
 
             # NOTE: asyncio.run() creates a new event loop in the worker thread
             # (this method is called via asyncio.to_thread), which is the
             # intended pattern for bridging sync -> async in a thread context.
-            toc: list[dict] = asyncio.run(
-                run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl, progress_cb)
-            )
-            logging.info("------------ T O C -------------\n" + json.dumps(toc, ensure_ascii=False, indent='  '))
+            toc: list[dict] = asyncio.run(run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl, progress_cb))
+            logging.info("------------ T O C -------------\n" + json.dumps(toc, ensure_ascii=False, indent="  "))
 
             for ii, item in enumerate(toc):
                 try:
@@ -578,19 +835,17 @@ class TaskHandler:
 
             if toc:
                 import copy
+
                 d = copy.deepcopy(docs[-1])
                 d["content_with_weight"] = json.dumps(toc, ensure_ascii=False)
                 d["toc_kwd"] = "toc"
                 d["available_int"] = 0
                 d["page_num_int"] = [100000000]
-                d["id"] = xxhash.xxh64(
-                    (d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
+                d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
                 return d
             return None
 
-    async def _delete_raptor_chunks(
-        self, doc_id: str, tenant_id: str, kb_id: str, keep_method: Optional[str]
-    ) -> int:
+    async def _delete_raptor_chunks(self, doc_id: str, tenant_id: str, kb_id: str, keep_method: Optional[str]) -> int:
         """Delete RAPTOR chunks."""
         if self._task_context.write_interceptor:
             return self._task_context.write_interceptor.intercept("delete_raptor_chunks")
diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py
index eed2e67c27..b69cca2570 100644
--- a/rag/utils/es_conn.py
+++ b/rag/utils/es_conn.py
@@ -65,12 +65,7 @@ class ESConnection(ESConnectionBase):
     """
 
     def _es_search_once(self, index_names: list[str], query: dict, track_total_hits: bool):
-        return self.es.search(
-            index=index_names,
-            body=query,
-            timeout="600s",
-            track_total_hits=track_total_hits
-        )
+        return self.es.search(index=index_names, body=query, timeout="600s", track_total_hits=track_total_hits)
 
     def _search_with_search_after(self, index_names: list[str], query: dict, offset: int, limit: int):
         q_base = copy.deepcopy(query)
@@ -139,17 +134,18 @@ class ESConnection(ESConnectionBase):
         return template_res
 
     def search(
-            self, select_fields: list[str],
-            highlight_fields: list[str],
-            condition: dict,
-            match_expressions: list[MatchExpr],
-            order_by: OrderByExpr,
-            offset: int,
-            limit: int,
-            index_names: str | list[str],
-            knowledgebase_ids: list[str],
-            agg_fields: list[str] | None = None,
-            rank_feature: dict | None = None
+        self,
+        select_fields: list[str],
+        highlight_fields: list[str],
+        condition: dict,
+        match_expressions: list[MatchExpr],
+        order_by: OrderByExpr,
+        offset: int,
+        limit: int,
+        index_names: str | list[str],
+        knowledgebase_ids: list[str],
+        agg_fields: list[str] | None = None,
+        rank_feature: dict | None = None,
     ):
         """
         Refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
@@ -166,19 +162,22 @@ class ESConnection(ESConnectionBase):
                 if v == 0:
                     bool_query.filter.append(Q("range", available_int={"lt": 1}))
                 else:
-                    bool_query.filter.append(
-                        Q("bool", must_not=Q("range", available_int={"lt": 1})))
+                    bool_query.filter.append(Q("bool", must_not=Q("range", available_int={"lt": 1})))
                 continue
             if k == "id":
                 if not v:
                     continue
                 if isinstance(v, list):
-                    bool_query.filter.append(
-                        Q("bool", should=[Q("terms", id=v), Q("terms", _id=v)], minimum_should_match=1))
+                    bool_query.filter.append(Q("bool", should=[Q("terms", id=v), Q("terms", _id=v)], minimum_should_match=1))
                 elif isinstance(v, str) or isinstance(v, int):
-                    bool_query.filter.append(
-                        Q("bool", should=[Q("term", id=v), Q("term", _id=v)], minimum_should_match=1))
+                    bool_query.filter.append(Q("bool", should=[Q("term", id=v), Q("term", _id=v)], minimum_should_match=1))
                 continue
+            if k == "must_not":
+                if isinstance(v, dict):
+                    for kk, vv in v.items():
+                        if kk == "exists":
+                            bool_query.must_not.append(Q("exists", field=vv))
+                    continue
             if not v:
                 continue
             if isinstance(v, list):
@@ -186,17 +185,18 @@ class ESConnection(ESConnectionBase):
             elif isinstance(v, str) or isinstance(v, int):
                 bool_query.filter.append(Q("term", **{k: v}))
             else:
-                raise Exception(
-                    f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+                raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
 
         s = Search()
         vector_similarity_weight = 0.5
         for m in match_expressions:
             if isinstance(m, FusionExpr) and m.method == "weighted_sum" and "weights" in m.fusion_params:
-                assert len(match_expressions) == 3 and isinstance(match_expressions[0], MatchTextExpr) and isinstance(
-                    match_expressions[1],
-                    MatchDenseExpr) and isinstance(
-                    match_expressions[2], FusionExpr)
+                assert (
+                    len(match_expressions) == 3
+                    and isinstance(match_expressions[0], MatchTextExpr)
+                    and isinstance(match_expressions[1], MatchDenseExpr)
+                    and isinstance(match_expressions[2], FusionExpr)
+                )
                 weights = m.fusion_params["weights"]
                 vector_similarity_weight = get_float(weights.split(",")[1])
         for m in match_expressions:
@@ -204,24 +204,22 @@ class ESConnection(ESConnectionBase):
                 minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)
                 if isinstance(minimum_should_match, float):
                     minimum_should_match = str(int(minimum_should_match * 100)) + "%"
-                bool_query.must.append(Q("query_string", fields=m.fields,
-                                         type="best_fields", query=m.matching_text,
-                                         minimum_should_match=minimum_should_match,
-                                         boost=1))
+                bool_query.must.append(Q("query_string", fields=m.fields, type="best_fields", query=m.matching_text, minimum_should_match=minimum_should_match, boost=1))
                 bool_query.boost = 1.0 - vector_similarity_weight
 
             elif isinstance(m, MatchDenseExpr):
-                assert (bool_query is not None)
+                assert bool_query is not None
                 similarity = 0.0
                 if "similarity" in m.extra_options:
                     similarity = m.extra_options["similarity"]
-                s = s.knn(m.vector_column_name,
-                          m.topn,
-                          m.topn * 2,
-                          query_vector=list(m.embedding_data),
-                          filter=bool_query.to_dict(),
-                          similarity=similarity,
-                          )
+                s = s.knn(
+                    m.vector_column_name,
+                    m.topn,
+                    m.topn * 2,
+                    query_vector=list(m.embedding_data),
+                    filter=bool_query.to_dict(),
+                    similarity=similarity,
+                )
 
         if bool_query and rank_feature:
             for fld, sc in rank_feature.items():
@@ -239,31 +237,25 @@ class ESConnection(ESConnectionBase):
             for field, order in order_by.fields:
                 order = "asc" if order == 0 else "desc"
                 if field in ["page_num_int", "top_int"]:
-                    order_info = {"order": order, "unmapped_type": "float",
-                                  "mode": "avg", "numeric_type": "double"}
+                    order_info = {"order": order, "unmapped_type": "float", "mode": "avg", "numeric_type": "double"}
                 elif field.endswith("_int") or field.endswith("_flt"):
                     order_info = {"order": order, "unmapped_type": "float"}
                 elif field == "id":
-                    continue # id as "text", not a "keyword", order by it will cause error
+                    continue  # id as "text", not a "keyword", order by it will cause error
                 else:
                     order_info = {"order": order, "unmapped_type": "keyword"}
                 orders.append({field: order_info})
             s = s.sort(*orders)
         if agg_fields:
             for fld in agg_fields:
-                s.aggs.bucket(f'aggs_{fld}', 'terms', field=fld, size=1000000)
+                s.aggs.bucket(f"aggs_{fld}", "terms", field=fld, size=1000000)
 
         has_dense = any(isinstance(m, MatchDenseExpr) for m in match_expressions)
         has_explicit_sort = bool(order_by and order_by.fields)
-        use_search_after = (
-            limit > 0
-            and (offset + limit > MAX_RESULT_WINDOW)
-            and has_explicit_sort
-            and not has_dense
-        )
+        use_search_after = limit > 0 and (offset + limit > MAX_RESULT_WINDOW) and has_explicit_sort and not has_dense
 
         if limit > 0 and not use_search_after:
-            s = s[offset:offset + limit]
+            s = s[offset : offset + limit]
         # Filter _source to only requested fields for efficiency, and add vector
         # fields to "fields" param so they appear in hit.fields when ES 9.x
         # exclude_source_vectors is enabled (dense_vector not in _source).
@@ -295,7 +287,7 @@ class ESConnection(ESConnectionBase):
                 continue
             except Exception as e:
                 # Only log debug for NotFoundError(accepted when metadata index doesn't exist)
-                if 'NotFound' in str(e):
+                if "NotFound" in str(e):
                     self.logger.debug(f"ESConnection.search {str(index_names)} query: " + str(q) + " - " + str(e))
                 else:
                     self.logger.exception(f"ESConnection.search {str(index_names)} query: " + str(q) + str(e))
@@ -314,16 +306,14 @@ class ESConnection(ESConnectionBase):
             d_copy["kb_id"] = knowledgebase_id
             # Use id as _id for uniqueness, also keep "id" as a regular field for sorting
             meta_id = d_copy.get("id", "")
-            operations.append(
-                {"index": {"_index": index_name, "_id": meta_id}})
+            operations.append({"index": {"_index": index_name, "_id": meta_id}})
             operations.append(d_copy)
 
         res = []
         for _ in range(ATTEMPT_TIME):
             try:
                 res = []
-                r = self.es.bulk(index=index_name, operations=operations,
-                                 refresh="wait_for", timeout="60s")
+                r = self.es.bulk(index=index_name, operations=operations, refresh="wait_for", timeout="60s")
                 if re.search(r"False", str(r["errors"]), re.IGNORECASE):
                     return res
 
@@ -359,10 +349,9 @@ class ESConnection(ESConnectionBase):
                     if "feas" != k.split("_")[-1]:
                         continue
                     try:
-                        self.es.update(index=index_name, id=chunk_id, script=f"ctx._source.remove(\"{k}\");")
+                        self.es.update(index=index_name, id=chunk_id, script=f'ctx._source.remove("{k}");')
                     except Exception:
-                        self.logger.exception(
-                            f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
+                        self.logger.exception(f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
                 try:
                     if remove_field is not None:
                         self.es.update(
@@ -375,9 +364,7 @@ class ESConnection(ESConnectionBase):
                         params = {}
                         for kk, vv in remove_dict.items():
                             scripts.append(
-                                f"if (ctx._source.containsKey('{kk}') && ctx._source.{kk} != null) "
-                                f"{{ int i = ctx._source.{kk}.indexOf(params.p_{kk}); "
-                                f"if (i >= 0) {{ ctx._source.{kk}.remove(i); }} }}"
+                                f"if (ctx._source.containsKey('{kk}') && ctx._source.{kk} != null) {{ int i = ctx._source.{kk}.indexOf(params.p_{kk}); if (i >= 0) {{ ctx._source.{kk}.remove(i); }} }}"
                             )
                             params[f"p_{kk}"] = vv
                         if scripts:
@@ -391,9 +378,7 @@ class ESConnection(ESConnectionBase):
                     if remove_field is not None or remove_dict is not None or doc_part:
                         return True
                 except Exception as e:
-                    self.logger.exception(
-                        f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception: " + str(
-                            e))
+                    self.logger.exception(f"ESConnection.update(index={index_name}, id={chunk_id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception: " + str(e))
                     break
             return False
 
@@ -405,13 +390,18 @@ class ESConnection(ESConnectionBase):
             if k == "exists":
                 bool_query.filter.append(Q("exists", field=v))
                 continue
+            if k == "must_not":
+                if isinstance(v, dict):
+                    for kk, vv in v.items():
+                        if kk == "exists":
+                            bool_query.must_not.append(Q("exists", field=vv))
+                    continue
             if isinstance(v, list):
                 bool_query.filter.append(Q("terms", **{k: v}))
             elif isinstance(v, str) or isinstance(v, int):
                 bool_query.filter.append(Q("term", **{k: v}))
             else:
-                raise Exception(
-                    f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+                raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
         scripts = []
         params = {}
         for k, v in new_value.items():
@@ -441,11 +431,8 @@ class ESConnection(ESConnectionBase):
                 scripts.append(f"ctx._source.{k}=params.pp_{k};")
                 params[f"pp_{k}"] = json.dumps(v, ensure_ascii=False)
             else:
-                raise Exception(
-                    f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
-        ubq = UpdateByQuery(
-            index=index_name).using(
-            self.es).query(bool_query)
+                raise Exception(f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
+        ubq = UpdateByQuery(index=index_name).using(self.es).query(bool_query)
         ubq = ubq.script(source="".join(scripts), params=params)
         ubq = ubq.params(refresh=True)
         ubq = ubq.params(slices=5)
@@ -563,10 +550,7 @@ class ESConnection(ESConnectionBase):
         self.logger.debug("ESConnection.delete query: " + json.dumps(qry.to_dict()))
         for _ in range(ATTEMPT_TIME):
             try:
-                res = self.es.delete_by_query(
-                    index=index_name,
-                    body=Search().query(qry).to_dict(),
-                    refresh=True)
+                res = self.es.delete_by_query(index=index_name, body=Search().query(qry).to_dict(), refresh=True)
                 return res["deleted"]
             except ConnectionTimeout:
                 self.logger.exception("ES request timeout")
diff --git a/rag/utils/opensearch_conn.py b/rag/utils/opensearch_conn.py
index c2ef36ae6d..97d510308f 100644
--- a/rag/utils/opensearch_conn.py
+++ b/rag/utils/opensearch_conn.py
@@ -26,8 +26,7 @@ from opensearchpy import UpdateByQuery, Q, Search, Index
 from opensearchpy import ConnectionTimeout
 from common.decorator import singleton
 from common.file_utils import get_project_base_directory
-from common.doc_store.doc_store_base import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \
-    FusionExpr
+from common.doc_store.doc_store_base import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr
 from rag.nlp import is_english, rag_tokenizer
 from common.constants import PAGERANK_FLD, TAG_FLD
 from common import settings
@@ -58,7 +57,7 @@ if (nw <= 0.0) {
 }
 """
 
-logger = logging.getLogger('ragflow.opensearch_conn')
+logger = logging.getLogger("ragflow.opensearch_conn")
 
 
 @singleton
@@ -70,10 +69,9 @@ class OSConnection(DocStoreConnection):
             try:
                 self.os = OpenSearch(
                     settings.OS["hosts"].split(","),
-                    http_auth=(settings.OS["username"], settings.OS[
-                        "password"]) if "username" in settings.OS and "password" in settings.OS else None,
+                    http_auth=(settings.OS["username"], settings.OS["password"]) if "username" in settings.OS and "password" in settings.OS else None,
                     verify_certs=False,
-                    timeout=600
+                    timeout=600,
                 )
                 if self.os:
                     self.info = self.os.info()
@@ -118,8 +116,7 @@ class OSConnection(DocStoreConnection):
         warning, leave it off, and search() keeps doing vector-only.
         """
         self.hybrid_search_enabled = False
-        self._hybrid_pipeline = os.environ.get("OS_HYBRID_PIPELINE") \
-            or settings.OS.get("hybrid_search_pipeline") or "ragflow_hybrid_pipeline"
+        self._hybrid_pipeline = os.environ.get("OS_HYBRID_PIPELINE") or settings.OS.get("hybrid_search_pipeline") or "ragflow_hybrid_pipeline"
 
         version_number = self.info.get("version", {}).get("number", "")
         try:
@@ -127,34 +124,32 @@ class OSConnection(DocStoreConnection):
         except (ValueError, AttributeError):
             version = (0, 0)
         if version < self.HYBRID_MIN_VERSION:
-            logger.warning(f"OpenSearch {version_number or 'unknown'} does not support the "
-                           f"normalization-processor (requires >= {self.HYBRID_MIN_VERSION[0]}."
-                           f"{self.HYBRID_MIN_VERSION[1]}); hybrid search is disabled and "
-                           f"queries fall back to vector-only.")
+            logger.warning(
+                f"OpenSearch {version_number or 'unknown'} does not support the "
+                f"normalization-processor (requires >= {self.HYBRID_MIN_VERSION[0]}."
+                f"{self.HYBRID_MIN_VERSION[1]}); hybrid search is disabled and "
+                f"queries fall back to vector-only."
+            )
             return
 
         weights = settings.OS.get("hybrid_search_weights", [0.5, 0.5])
         pipeline_body = {
             "description": "RAGFlow hybrid search normalization pipeline (BM25 + KNN).",
-            "phase_results_processors": [
-                {"normalization-processor": {
-                    "normalization": {"technique": "min_max"},
-                    "combination": {"technique": "arithmetic_mean",
-                                    "parameters": {"weights": weights}}}}
-            ],
+            "phase_results_processors": [{"normalization-processor": {"normalization": {"technique": "min_max"}, "combination": {"technique": "arithmetic_mean", "parameters": {"weights": weights}}}}],
         }
         try:
-            self.os.transport.perform_request(
-                "PUT", f"/_search/pipeline/{self._hybrid_pipeline}", body=pipeline_body)
+            self.os.transport.perform_request("PUT", f"/_search/pipeline/{self._hybrid_pipeline}", body=pipeline_body)
             self.hybrid_search_enabled = True
-            logger.info(f"OpenSearch hybrid search enabled via pipeline "
-                        f"'{self._hybrid_pipeline}' (weights {weights}).")
+            logger.info(f"OpenSearch hybrid search enabled via pipeline '{self._hybrid_pipeline}' (weights {weights}).")
         except Exception:
-            logger.warning(f"Could not create OpenSearch search pipeline '{self._hybrid_pipeline}'; "
-                           f"hybrid search is disabled and queries fall back to vector-only. "
-                           f"Creating a search pipeline needs the "
-                           f"'cluster:admin/search/pipeline/put' privilege (relevant on "
-                           f"locked-down or managed OpenSearch).", exc_info=True)
+            logger.warning(
+                f"Could not create OpenSearch search pipeline '{self._hybrid_pipeline}'; "
+                f"hybrid search is disabled and queries fall back to vector-only. "
+                f"Creating a search pipeline needs the "
+                f"'cluster:admin/search/pipeline/put' privilege (relevant on "
+                f"locked-down or managed OpenSearch).",
+                exc_info=True,
+            )
 
     """
     Database operations
@@ -177,8 +172,8 @@ class OSConnection(DocStoreConnection):
             return True
         try:
             from opensearchpy.client import IndicesClient
-            return IndicesClient(self.os).create(index=indexName,
-                                                 body=self.mapping)
+
+            return IndicesClient(self.os).create(index=indexName, body=self.mapping)
         except Exception:
             logger.exception("OSConnection.createIndex error %s" % (indexName))
 
@@ -215,6 +210,7 @@ class OSConnection(DocStoreConnection):
                 mappings = {**mappings, "dynamic": True}
 
             from opensearchpy.client import IndicesClient
+
             body = {
                 "settings": doc_meta_mapping["settings"],
                 "mappings": mappings,
@@ -316,17 +312,18 @@ class OSConnection(DocStoreConnection):
     """
 
     def search(
-            self, select_fields: list[str],
-            highlight_fields: list[str],
-            condition: dict,
-            match_expressions: list[MatchExpr],
-            order_by: OrderByExpr,
-            offset: int,
-            limit: int,
-            index_names: str | list[str],
-            knowledgebase_ids: list[str],
-            agg_fields: list[str] = [],
-            rank_feature: dict | None = None
+        self,
+        select_fields: list[str],
+        highlight_fields: list[str],
+        condition: dict,
+        match_expressions: list[MatchExpr],
+        order_by: OrderByExpr,
+        offset: int,
+        limit: int,
+        index_names: str | list[str],
+        knowledgebase_ids: list[str],
+        agg_fields: list[str] = [],
+        rank_feature: dict | None = None,
     ):
         """
         Refers to https://github.com/opensearch-project/opensearch-py/blob/main/guides/dsl.md
@@ -345,9 +342,22 @@ class OSConnection(DocStoreConnection):
                 if v == 0:
                     bqry.filter.append(Q("range", available_int={"lt": 1}))
                 else:
-                    bqry.filter.append(
-                        Q("bool", must_not=Q("range", available_int={"lt": 1})))
+                    bqry.filter.append(Q("bool", must_not=Q("range", available_int={"lt": 1})))
                 continue
+            if k == "id":
+                if not v:
+                    continue
+                if isinstance(v, list):
+                    bqry.filter.append(Q("bool", should=[Q("terms", id=v), Q("ids", values=v)], minimum_should_match=1))
+                elif isinstance(v, str) or isinstance(v, int):
+                    bqry.filter.append(Q("bool", should=[Q("term", id=v), Q("ids", values=[v])], minimum_should_match=1))
+                continue
+            if k == "must_not":
+                if isinstance(v, dict):
+                    for kk, vv in v.items():
+                        if kk == "exists":
+                            bqry.must_not.append(Q("exists", field=vv))
+                    continue
             if not v:
                 continue
             if isinstance(v, list):
@@ -355,16 +365,18 @@ class OSConnection(DocStoreConnection):
             elif isinstance(v, str) or isinstance(v, int):
                 bqry.filter.append(Q("term", **{k: v}))
             else:
-                raise Exception(
-                    f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+                raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
 
         s = Search()
         vector_similarity_weight = 0.5
         for m in match_expressions:
             if isinstance(m, FusionExpr) and m.method == "weighted_sum" and "weights" in m.fusion_params:
-                assert len(match_expressions) == 3 and isinstance(match_expressions[0], MatchTextExpr) and isinstance(match_expressions[1],
-                                                                                                        MatchDenseExpr) and isinstance(
-                    match_expressions[2], FusionExpr)
+                assert (
+                    len(match_expressions) == 3
+                    and isinstance(match_expressions[0], MatchTextExpr)
+                    and isinstance(match_expressions[1], MatchDenseExpr)
+                    and isinstance(match_expressions[2], FusionExpr)
+                )
                 weights = m.fusion_params["weights"]
                 vector_similarity_weight = float(weights.split(",")[1])
         knn_query = {}
@@ -374,10 +386,7 @@ class OSConnection(DocStoreConnection):
                 minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)
                 if isinstance(minimum_should_match, float):
                     minimum_should_match = str(int(minimum_should_match * 100)) + "%"
-                bqry.must.append(Q("query_string", fields=m.fields,
-                                   type="best_fields", query=m.matching_text,
-                                   minimum_should_match=minimum_should_match,
-                                   boost=1))
+                bqry.must.append(Q("query_string", fields=m.fields, type="best_fields", query=m.matching_text, minimum_should_match=minimum_should_match, boost=1))
                 bqry.boost = 1.0 - vector_similarity_weight
 
             # Elasticsearch has the encapsulation of KNN_search in python sdk
@@ -385,7 +394,7 @@ class OSConnection(DocStoreConnection):
             # the following codes implement KNN_search in OpenSearch using DSL
             # Besides, Opensearch's DSL for KNN_search query syntax differs from that in Elasticsearch, I also made some adaptions for it
             elif isinstance(m, MatchDenseExpr):
-                assert (bqry is not None)
+                assert bqry is not None
                 similarity = 0.0
                 if "similarity" in m.extra_options:
                     similarity = m.extra_options["similarity"]
@@ -419,8 +428,7 @@ class OSConnection(DocStoreConnection):
             for field, order in order_by.fields:
                 order = "asc" if order == 0 else "desc"
                 if field in ["page_num_int", "top_int"]:
-                    order_info = {"order": order, "unmapped_type": "float",
-                                  "mode": "avg", "numeric_type": "double"}
+                    order_info = {"order": order, "unmapped_type": "float", "mode": "avg", "numeric_type": "double"}
                 elif field.endswith("_int") or field.endswith("_flt"):
                     order_info = {"order": order, "unmapped_type": "float"}
                 else:
@@ -429,10 +437,10 @@ class OSConnection(DocStoreConnection):
             s = s.sort(*orders)
 
         for fld in agg_fields:
-            s.aggs.bucket(f'aggs_{fld}', 'terms', field=fld, size=1000000)
+            s.aggs.bucket(f"aggs_{fld}", "terms", field=fld, size=1000000)
 
         if limit > 0:
-            s = s[offset:offset + limit]
+            s = s[offset : offset + limit]
         q = s.to_dict()
         logger.debug(f"OSConnection.search {str(index_names)} query: " + json.dumps(q))
 
@@ -455,13 +463,15 @@ class OSConnection(DocStoreConnection):
 
         for i in range(ATTEMPT_TIME):
             try:
-                res = self.os.search(index=index_names,
-                                     body=q,
-                                     timeout=600,
-                                     # search_type="dfs_query_then_fetch",
-                                     track_total_hits=True,
-                                     _source=True,
-                                     **search_kwargs)
+                res = self.os.search(
+                    index=index_names,
+                    body=q,
+                    timeout=600,
+                    # search_type="dfs_query_then_fetch",
+                    track_total_hits=True,
+                    _source=True,
+                    **search_kwargs,
+                )
                 if str(res.get("timed_out", "")).lower() == "true":
                     raise Exception("OpenSearch Timeout.")
                 logger.debug(f"OSConnection.search {str(index_names)} res: " + str(res))
@@ -477,8 +487,11 @@ class OSConnection(DocStoreConnection):
     def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
         for i in range(ATTEMPT_TIME):
             try:
-                res = self.os.get(index=(indexName),
-                                  id=chunkId, _source=True, )
+                res = self.os.get(
+                    index=(indexName),
+                    id=chunkId,
+                    _source=True,
+                )
                 if str(res.get("timed_out", "")).lower() == "true":
                     raise Exception("Es Timeout.")
                 chunk = res["_source"]
@@ -505,16 +518,14 @@ class OSConnection(DocStoreConnection):
             # doc-meta read path (DocMetadataService filters on / sorts by the
             # "id" field) can find it, mirroring ESConnection.insert().
             meta_id = d_copy.get("id", "")
-            operations.append(
-                {"index": {"_index": indexName, "_id": meta_id}})
+            operations.append({"index": {"_index": indexName, "_id": meta_id}})
             operations.append(d_copy)
 
         res = []
         for _ in range(ATTEMPT_TIME):
             try:
                 res = []
-                r = self.os.bulk(index=(indexName), body=operations,
-                                 refresh="wait_for", timeout=60)
+                r = self.os.bulk(index=(indexName), body=operations, refresh="wait_for", timeout=60)
                 if re.search(r"False", str(r["errors"]), re.IGNORECASE):
                     return res
 
@@ -556,9 +567,7 @@ class OSConnection(DocStoreConnection):
                         params = {}
                         for kk, vv in remove_dict.items():
                             scripts.append(
-                                f"if (ctx._source.containsKey('{kk}') && ctx._source.{kk} != null) "
-                                f"{{ int i = ctx._source.{kk}.indexOf(params.p_{kk}); "
-                                f"if (i >= 0) {{ ctx._source.{kk}.remove(i); }} }}"
+                                f"if (ctx._source.containsKey('{kk}') && ctx._source.{kk} != null) {{ int i = ctx._source.{kk}.indexOf(params.p_{kk}); if (i >= 0) {{ ctx._source.{kk}.remove(i); }} }}"
                             )
                             params[f"p_{kk}"] = vv
                         if scripts:
@@ -572,8 +581,7 @@ class OSConnection(DocStoreConnection):
                     if remove_field is not None or remove_dict is not None or doc_part:
                         return True
                 except Exception as e:
-                    logger.exception(
-                        f"OSConnection.update(index={indexName}, id={id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
+                    logger.exception(f"OSConnection.update(index={indexName}, id={id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
                     if re.search(r"(timeout|connection)", str(e).lower()):
                         continue
                     break
@@ -592,8 +600,7 @@ class OSConnection(DocStoreConnection):
             elif isinstance(v, str) or isinstance(v, int):
                 bqry.filter.append(Q("term", **{k: v}))
             else:
-                raise Exception(
-                    f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+                raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
         scripts = []
         params = {}
         for k, v in newValue.items():
@@ -623,11 +630,8 @@ class OSConnection(DocStoreConnection):
                 scripts.append(f"ctx._source.{k}=params.pp_{k};")
                 params[f"pp_{k}"] = json.dumps(v, ensure_ascii=False)
             else:
-                raise Exception(
-                    f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
-        ubq = UpdateByQuery(
-            index=indexName).using(
-            self.os).query(bqry)
+                raise Exception(f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
+        ubq = UpdateByQuery(index=indexName).using(self.os).query(bqry)
         ubq = ubq.script(source="".join(scripts), params=params)
         ubq = ubq.params(refresh=True)
         ubq = ubq.params(slices=5)
@@ -734,10 +738,7 @@ class OSConnection(DocStoreConnection):
         for _ in range(ATTEMPT_TIME):
             try:
                 # print(Search().query(qry).to_dict(), flush=True)
-                res = self.os.delete_by_query(
-                    index=indexName,
-                    body=Search().query(qry).to_dict(),
-                    refresh=True)
+                res = self.os.delete_by_query(index=indexName, body=Search().query(qry).to_dict(), refresh=True)
                 return res["deleted"]
             except Exception as e:
                 logger.warning("OSConnection.delete got exception: " + str(e))
@@ -820,8 +821,7 @@ class OSConnection(DocStoreConnection):
             txts = []
             for t in re.split(r"[.?!;\n]", txt):
                 for w in keywords:
-                    t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])" % re.escape(w), r"\1<em>\2</em>\3", t,
-                               flags=re.IGNORECASE | re.MULTILINE)
+                    t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])" % re.escape(w), r"\1<em>\2</em>\3", t, flags=re.IGNORECASE | re.MULTILINE)
                 if not re.search(r"<em>[^<>]+</em>", t, flags=re.IGNORECASE | re.MULTILINE):
                     continue
                 txts.append(t)
@@ -847,14 +847,8 @@ class OSConnection(DocStoreConnection):
         replaces = []
         for r in re.finditer(r" ([a-z_]+_l?tks)( like | ?= ?)'([^']+)'", sql):
             fld, v = r.group(1), r.group(3)
-            match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format(
-                fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v)))
-            replaces.append(
-                ("{}{}'{}'".format(
-                    r.group(1),
-                    r.group(2),
-                    r.group(3)),
-                 match))
+            match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format(fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v)))
+            replaces.append(("{}{}'{}'".format(r.group(1), r.group(2), r.group(3)), match))
 
         for p, r in replaces:
             sql = sql.replace(p, r, 1)
@@ -862,8 +856,7 @@ class OSConnection(DocStoreConnection):
 
         for i in range(ATTEMPT_TIME):
             try:
-                res = self.os.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format,
-                                        request_timeout="2s")
+                res = self.os.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout="2s")
                 return res
             except ConnectionTimeout:
                 logger.exception("OSConnection.sql timeout")
diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py
index 7099ece1c8..28075c125e 100644
--- a/rag/utils/redis_conn.py
+++ b/rag/utils/redis_conn.py
@@ -216,6 +216,14 @@ class RedisDB:
             self.__open__()
         return False
 
+    def set_if_absent(self, k, v, exp=3600):
+        try:
+            return bool(self.REDIS.set(k, v, exp, nx=True))
+        except Exception as e:
+            logging.warning("RedisDB.set_if_absent " + str(k) + " got exception: " + str(e))
+            self.__open__()
+        return False
+
     def sadd(self, key: str, member: str):
         try:
             self.REDIS.sadd(key, member)
diff --git a/test/testcases/restful_api/test_file_commit_routes_unit.py b/test/testcases/restful_api/test_file_commit_routes_unit.py
index 1dce6eaf24..0e9e72e987 100644
--- a/test/testcases/restful_api/test_file_commit_routes_unit.py
+++ b/test/testcases/restful_api/test_file_commit_routes_unit.py
@@ -120,7 +120,16 @@ class FileTestModel(BaseTestModel):
         db_table = "file"
 
 
-_TABLES = [FileCommitTestModel, FileCommitItemTestModel, FileTestModel]
+class UserTestModel(BaseTestModel):
+    id = CharField(max_length=32, primary_key=True)
+    nickname = CharField(max_length=100, null=False, index=True)
+    email = CharField(max_length=255, null=False)
+
+    class Meta:
+        db_table = "user"
+
+
+_TABLES = [FileCommitTestModel, FileCommitItemTestModel, FileTestModel, UserTestModel]
 sqlite_db.create_tables(_TABLES)
 
 
@@ -241,6 +250,7 @@ def _load_module(monkeypatch):
     db_models_mod.FileCommit = FileCommitTestModel
     db_models_mod.FileCommitItem = FileCommitItemTestModel
     db_models_mod.File = FileTestModel
+    db_models_mod.User = UserTestModel
     db_models_mod.DataBaseModel = BaseTestModel
     monkeypatch.setitem(sys.modules, "api.db.db_models", db_models_mod)
 
diff --git a/test/unit_test/rag/graphrag/test_checkpoint_resume.py b/test/unit_test/rag/graphrag/test_checkpoint_resume.py
index 766bf86358..fc19fcf673 100644
--- a/test/unit_test/rag/graphrag/test_checkpoint_resume.py
+++ b/test/unit_test/rag/graphrag/test_checkpoint_resume.py
@@ -71,7 +71,7 @@
 #     "rag.utils.base64_image",
 
 #     "rag.prompts.generator",
-#     "rag.raptor",
+#     "rag.advanced_rag.knowlege_compile.raptor",
 #     "rag.app",
 #     "rag.graphrag.utils",
 # ]
diff --git a/test/unit_test/rag/svr/task_executor_refactor/test_chunk_post_processor.py b/test/unit_test/rag/svr/task_executor_refactor/test_chunk_post_processor.py
deleted file mode 100644
index c1394c4fae..0000000000
--- a/test/unit_test/rag/svr/task_executor_refactor/test_chunk_post_processor.py
+++ /dev/null
@@ -1,438 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-"""
-Unit tests for ChunkPostProcessor module.
-
-Mock strategy: the LLM prompt functions (``keyword_extraction``, ``question_proposal``,
-``gen_metadata``, ``content_tagging``) are mocked since they make actual LLM API
-calls.  ``get_llm_cache`` / ``set_llm_cache`` run as real code, so cache
-population and retrieval are exercised.  ``rag_tokenizer`` is mocked because
-it requires NLTK data in the test environment.
-"""
-
-import pytest
-from unittest.mock import MagicMock, patch
-from rag.svr.task_executor_refactor.chunk_post_processor import (
-    extract_keywords,
-    generate_questions,
-    generate_metadata,
-    apply_tags,
-    count_with_key,
-    build_metadata_config,
-)
-from test.unit_test.rag.svr.task_executor_refactor.conftest import (
-    make_task_context,
-    MockChatModel,
-)
-
-
-class _BasePostProcessorTest:
-    """Shared helpers for post-processor test classes."""
-
-    @staticmethod
-    def _mock_llm_binding(chat_model_cls=MockChatModel):
-        """Patch model config lookup + LLMBundle to return a MockChatModel."""
-        p1 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_model_config_from_provider_instance",
-                   return_value=MagicMock())
-        p2 = patch("rag.svr.task_executor_refactor.chunk_post_processor.LLMBundle",
-                   return_value=chat_model_cls())
-        return p1, p2
-
-    @staticmethod
-    def _patch_prompt_func(func_path: str, return_value):
-        """Patch a prompt-level LLM function (the actual API call)."""
-        return patch(func_path, return_value=return_value)
-
-
-class TestExtractKeywords(_BasePostProcessorTest):
-    """Tests for extract_keywords function."""
-
-    @pytest.mark.asyncio
-    async def test_extract_keywords_success(self):
-        """Test successful keyword extraction — cache miss → LLM prompt runs."""
-        ctx = make_task_context(parser_config={"auto_keywords": 5})
-        docs = [
-            {"content_with_weight": "This is test content one"},
-            {"content_with_weight": "This is test content two"},
-        ]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)  # cache miss
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")  # Redis stub
-        p5 = self._patch_prompt_func(
-            "rag.svr.task_executor_refactor.chunk_post_processor.keyword_extraction",
-            return_value="keyword1, keyword2",
-        )
-        p6 = patch("rag.svr.task_executor_refactor.chunk_post_processor.rag_tokenizer")
-        with p1, p2, p3, p4, p5, p6 as mock_tok:
-            mock_tok.tokenize.return_value = "keyword1 keyword2"
-            await extract_keywords(docs, ctx)
-            assert "important_kwd" in docs[0]
-            assert "important_tks" in docs[0]
-
-    @pytest.mark.asyncio
-    async def test_extract_keywords_canceled(self):
-        """Test keyword extraction when task is canceled."""
-        ctx = make_task_context(parser_config={"auto_keywords": 5},
-                                has_canceled_func=MagicMock(return_value=True))
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        with p1, p2:
-            await extract_keywords(docs, ctx)
-            assert "important_kwd" not in docs[0]
-
-    @pytest.mark.asyncio
-    async def test_extract_keywords_empty_docs(self):
-        """Test keyword extraction with empty docs list."""
-        ctx = make_task_context(parser_config={"auto_keywords": 5})
-        docs = []
-
-        p1, p2 = self._mock_llm_binding()
-        with p1, p2:
-            await extract_keywords(docs, ctx)
-            ctx.progress_cb.assert_called()
-
-
-class TestGenerateQuestions(_BasePostProcessorTest):
-    """Tests for generate_questions function."""
-
-    @pytest.mark.asyncio
-    async def test_generate_questions_success(self):
-        """Test successful question generation — cache miss → LLM prompt runs."""
-        ctx = make_task_context(parser_config={"auto_questions": 3})
-        docs = [{"content_with_weight": "This is test content one"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = self._patch_prompt_func(
-            "rag.svr.task_executor_refactor.chunk_post_processor.question_proposal",
-            return_value="Question 1\nQuestion 2",
-        )
-        p6 = patch("rag.svr.task_executor_refactor.chunk_post_processor.rag_tokenizer")
-        with p1, p2, p3, p4, p5, p6 as mock_tok:
-            mock_tok.tokenize.return_value = "Question 1 Question 2"
-            await generate_questions(docs, ctx)
-            assert "question_kwd" in docs[0]
-            assert "question_tks" in docs[0]
-
-    @pytest.mark.asyncio
-    async def test_generate_questions_canceled(self):
-        """Test question generation when task is canceled."""
-        ctx = make_task_context(parser_config={"auto_questions": 3},
-                                has_canceled_func=MagicMock(return_value=True))
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        with p1, p2:
-            await generate_questions(docs, ctx)
-            assert "question_kwd" not in docs[0]
-
-
-class TestGenerateMetadata(_BasePostProcessorTest):
-    """Tests for generate_metadata function."""
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_success(self):
-        """Test successful metadata generation — cache miss → LLM prompt runs."""
-        ctx = make_task_context(
-            parser_config={
-                "enable_metadata": True,
-                "metadata": [{"key": "category", "type": "string"}],
-                "built_in_metadata": [{"key": "update_time", "type": "time"}],
-            },
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3, p4, p5 as mock_meta:
-            mock_meta.get_document_metadata.return_value = {}
-            mock_meta.update_document_metadata = MagicMock()
-            await generate_metadata(docs, ctx)
-            mock_meta.update_document_metadata.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_with_write_interceptor(self):
-        """Test metadata generation with write interceptor."""
-        ctx = make_task_context(
-            parser_config={
-                "enable_metadata": True,
-                "metadata": [{"key": "category", "type": "string"}],
-                "built_in_metadata": [{"key": "update_time", "type": "time"}],
-            },
-            write_interceptor=MagicMock(),
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3, p4, p5:
-            await generate_metadata(docs, ctx)
-            ctx.write_interceptor.intercept.assert_called_once_with(
-                "DocMetadataService.update_document_metadata"
-            )
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_empty_config_does_not_crash(self):
-        """Empty parser_config — no metadata configured — should not crash."""
-        ctx = make_task_context(parser_config={})
-        docs = [{"content_with_weight": "test"}]
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3:
-            await generate_metadata(docs, ctx)  # no exception = pass
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_enum_none_accepted(self):
-        """enum: None in metadata — treated as absent, should not crash."""
-        ctx = make_task_context(
-            parser_config={
-                "enable_metadata": True,
-                "metadata": [{"key": "format", "type": "string", "enum": None}],
-            },
-        )
-        docs = [{"content_with_weight": "test"}]
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3, p4, p5:
-            await generate_metadata(docs, ctx)  # no exception = pass
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_description_none_accepted(self):
-        """description: None in metadata — should not crash."""
-        ctx = make_task_context(
-            parser_config={
-                "enable_metadata": True,
-                "metadata": [{"key": "test", "type": "string", "description": None}],
-            },
-        )
-        docs = [{"content_with_weight": "test"}]
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3, p4, p5:
-            await generate_metadata(docs, ctx)  # no exception = pass
-
-    @pytest.mark.asyncio
-    async def test_generate_metadata_built_in_with_enum_none(self):
-        """built_in_metadata with enum: None — should not crash."""
-        ctx = make_task_context(
-            parser_config={
-                "enable_metadata": True,
-                "built_in_metadata": [
-                    {"key": "update_time", "type": "time", "description": None, "enum": None},
-                ],
-            },
-        )
-        docs = [{"content_with_weight": "test"}]
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value=None)
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.DocMetadataService")
-        with p1, p2, p3, p4, p5:
-            await generate_metadata(docs, ctx)  # no exception = pass
-
-
-class TestApplyTags(_BasePostProcessorTest):
-    """Tests for apply_tags function."""
-
-    @pytest.mark.asyncio
-    async def test_apply_tags_success(self):
-        """Test successful tag application with tag cache miss."""
-        ctx = make_task_context(
-            kb_parser_config={"tag_kb_ids": ["kb_1"], "topn_tags": 3},
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.settings")
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value='{"tag1": 1}')  # cache hit → skip LLM
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p6 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_tags_from_cache",
-                   return_value=None)
-        p7 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_tags_to_cache")
-        with p1, p2, p3 as mock_settings, p4, p5, p6 as mock_get_tags, p7 as mock_set_tags:
-            mock_settings.retriever.all_tags_in_portion.return_value = {"tag1": 10, "tag2": 5}
-            mock_settings.retriever.tag_content.return_value = True
-            await apply_tags(docs, ctx)
-            assert len(docs) == 1
-            mock_get_tags.assert_called_once()
-            mock_set_tags.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_apply_tags_canceled(self):
-        """Test tag application when task is canceled."""
-        ctx = make_task_context(
-            kb_parser_config={"tag_kb_ids": ["kb_1"], "topn_tags": 3},
-            has_canceled_func=MagicMock(return_value=True),
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.settings")
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_tags_from_cache",
-                   return_value=None)
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_tags_to_cache")
-        with p1, p2, p3 as mock_settings, p4, p5:
-            mock_settings.retriever.all_tags_in_portion.return_value = {"tag1": 10}
-            await apply_tags(docs, ctx)
-
-    @pytest.mark.asyncio
-    async def test_apply_tags_tag_cache_miss(self):
-        """Test apply_tags when get_tags_from_cache returns None (cache miss)."""
-        ctx = make_task_context(
-            kb_parser_config={"tag_kb_ids": ["kb_1"], "topn_tags": 3},
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.settings")
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value='{"tag1": 1}')  # cache hit → skip LLM
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p6 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_tags_from_cache",
-                   return_value=None)
-        p7 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_tags_to_cache")
-        with p1, p2, p3 as mock_settings, p4, p5, p6 as mock_get_tags, p7 as mock_set_tags:
-            mock_settings.retriever.all_tags_in_portion.return_value = {"tag1": 10, "tag2": 5}
-            mock_settings.retriever.tag_content.return_value = True
-            await apply_tags(docs, ctx)
-            mock_get_tags.assert_called_once_with(["kb_1"])
-            mock_set_tags.assert_called_once()
-            mock_settings.retriever.all_tags_in_portion.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_apply_tags_tag_cache_hit(self):
-        """Test apply_tags when get_tags_from_cache returns valid data (cache hit)."""
-        ctx = make_task_context(
-            kb_parser_config={"tag_kb_ids": ["kb_1"], "topn_tags": 3},
-        )
-        docs = [{"content_with_weight": "This is test content"}]
-
-        p1, p2 = self._mock_llm_binding()
-        p3 = patch("rag.svr.task_executor_refactor.chunk_post_processor.settings")
-        p4 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_llm_cache",
-                   return_value='{"tag1": 1}')  # cache hit → skip LLM
-        p5 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_llm_cache")
-        p6 = patch("rag.svr.task_executor_refactor.chunk_post_processor.get_tags_from_cache",
-                   return_value='{"cached_tag": 10}')
-        p7 = patch("rag.svr.task_executor_refactor.chunk_post_processor.set_tags_to_cache")
-        with p1, p2, p3 as mock_settings, p4, p5, p6 as mock_get_tags, p7 as mock_set_tags:
-            mock_settings.retriever.tag_content.return_value = True
-            await apply_tags(docs, ctx)
-            mock_get_tags.assert_called_once_with(["kb_1"])
-            mock_set_tags.assert_not_called()
-            mock_settings.retriever.all_tags_in_portion.assert_not_called()
-
-
-class TestCountWithKey:
-    """Tests for count_with_key function."""
-
-    def test_count_with_key_all_have_key(self):
-        docs = [{"tag": 1}, {"tag": 2}, {"tag": 3}]
-        assert count_with_key(docs, "tag") == 3
-
-    def test_count_with_key_some_have_key(self):
-        docs = [{"tag": 1}, {"other": 2}, {"tag": 3}]
-        assert count_with_key(docs, "tag") == 2
-
-    def test_count_with_key_none_have_key(self):
-        docs = [{"other": 1}, {"other": 2}]
-        assert count_with_key(docs, "tag") == 0
-
-    def test_count_with_key_empty_docs(self):
-        assert count_with_key([], "tag") == 0
-
-    def test_count_with_key_falsy_value(self):
-        docs = [{"tag": 0}, {"tag": ""}, {"tag": None}]
-        assert count_with_key(docs, "tag") == 0
-
-    def test_count_with_key_truthy_value(self):
-        docs = [{"tag": 1}, {"tag": "value"}, {"tag": [1, 2]}]
-        assert count_with_key(docs, "tag") == 3
-
-
-class TestBuildMetadataConfig:
-    """Tests for build_metadata_config function."""
-
-    def test_dict_without_properties_returns_schema(self):
-        parser_config = {"metadata": {"type": "object"}, "built_in_metadata": []}
-        assert build_metadata_config(parser_config) == {"type": "object", "properties": {}}
-
-    def test_dict_with_properties_and_built_in(self):
-        parser_config = {
-            "metadata": {"type": "object", "properties": {"a": {"type": "string"}}},
-            "built_in_metadata": [{"key": "author", "description": "Author name", "enum": ["alice", "bob"]}],
-        }
-        result = build_metadata_config(parser_config)
-        assert "a" in result["properties"]
-        assert "author" in result["properties"]
-
-    def test_dict_with_properties_no_built_in(self):
-        parser_config = {
-            "metadata": {"type": "object", "properties": {"a": {"type": "string"}}},
-            "built_in_metadata": [],
-        }
-        result = build_metadata_config(parser_config)
-        assert result == {"type": "object", "properties": {"a": {"type": "string"}}}
-
-    def test_list_with_built_in(self):
-        parser_config = {
-            "metadata": [{"key": "category"}],
-            "built_in_metadata": [{"key": "author"}],
-        }
-        assert build_metadata_config(parser_config) == [{"key": "category"}, {"key": "author"}]
-
-    def test_list_without_built_in(self):
-        parser_config = {"metadata": [{"key": "category"}], "built_in_metadata": []}
-        assert build_metadata_config(parser_config) == [{"key": "category"}]
-
-    def test_other_type_with_built_in(self):
-        parser_config = {"metadata": [], "built_in_metadata": [{"key": "author"}]}
-        assert build_metadata_config(parser_config) == [{"key": "author"}]
-
-    def test_idempotent_same_input(self):
-        parser_config = {
-            "metadata": [{"key": "category"}],
-            "built_in_metadata": [{"key": "author"}],
-        }
-        assert build_metadata_config(parser_config) == build_metadata_config(parser_config)
-
-    def test_missing_metadata_key(self):
-        assert build_metadata_config({"built_in_metadata": []}) == []
-
-    def test_metadata_is_none(self):
-        """When metadata is None, built_in_metadata alone is returned."""
-        parser_config = {"metadata": None, "built_in_metadata": [{"key": "author"}]}
-        result = build_metadata_config(parser_config)
-        assert result == [{"key": "author"}]
diff --git a/test/unit_test/rag/svr/task_executor_refactor/test_chunk_service.py b/test/unit_test/rag/svr/task_executor_refactor/test_chunk_service.py
deleted file mode 100644
index b8ce477dcb..0000000000
--- a/test/unit_test/rag/svr/task_executor_refactor/test_chunk_service.py
+++ /dev/null
@@ -1,318 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-"""
-Unit tests for ChunkService module.
-
-Note: After refactoring, some functionality has been moved to:
-- chunk_builder.py: Parser factory, run_chunking, extract_outline
-- chunk_post_processor.py: Keyword extraction, question generation, metadata, tagging
-
-This test file now focuses on ChunkService-specific functionality:
-- build_chunks orchestration
-- _prepare_docs_and_upload
-- insert_chunks and related methods
-"""
-
-import pytest
-from unittest.mock import MagicMock, patch, AsyncMock
-from rag.svr.task_executor_refactor.chunk_service import ChunkService
-from test.unit_test.rag.svr.task_executor_refactor.conftest import make_task_context
-
-
-class TestChunkServiceInit:
-    """Tests for ChunkService initialization."""
-
-    def test_init_stores_task_context(self):
-        """Test that task context is stored."""
-        ctx = MagicMock()
-        service = ChunkService(ctx=ctx)
-        assert service._task_context is ctx
-
-
-class TestChunkServiceBuildChunks:
-    """Tests for build_chunks method."""
-
-
-    @pytest.mark.asyncio
-    async def test_build_chunks_file_size_exceeded(self):
-        """Test build_chunks returns empty list when file size exceeds limit."""
-        ctx = make_task_context(size=1000000000)  # Very large size
-
-        service = ChunkService(ctx=ctx)
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.DOC_MAXIMUM_SIZE = 1000  # Small limit
-
-            mock_rec_ctx = MagicMock()
-            ctx.recording_context = mock_rec_ctx
-
-            result = await service.build_chunks(b"test binary")
-
-            assert result == []
-            mock_rec_ctx.record.assert_any_call("file_size_exceeded", True)
-
-    @pytest.mark.asyncio
-    async def test_build_chunks_file_size_ok(self):
-        """Test build_chunks proceeds when file size is within limit."""
-        ctx = make_task_context(size=1000)
-
-        service = ChunkService(ctx=ctx)
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.DOC_MAXIMUM_SIZE = 10000000  # Large limit
-
-            mock_rec_ctx = MagicMock()
-            ctx.recording_context = mock_rec_ctx
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.get_parser") as mock_get_parser:
-                mock_parser = MagicMock()
-                mock_get_parser.return_value = mock_parser
-
-                with patch("rag.svr.task_executor_refactor.chunk_service.run_chunking", new_callable=AsyncMock) as mock_run_chunking:
-                    mock_run_chunking.return_value = [{"content_with_weight": "test"}]
-
-                    with patch("rag.svr.task_executor_refactor.chunk_service.extract_outline", new_callable=AsyncMock):
-                        with patch.object(service, '_prepare_docs_and_upload', new_callable=AsyncMock) as mock_prepare:
-                            mock_prepare.return_value = [{"id": "chunk_1", "content_with_weight": "test"}]
-
-                            await service.build_chunks(b"test binary")
-
-                            mock_rec_ctx.record.assert_any_call("file_size_exceeded", False)
-                            mock_rec_ctx.record.assert_any_call("parser_id", "naive")
-                            mock_get_parser.assert_called_once_with("naive")
-
-    @pytest.mark.asyncio
-    @pytest.mark.parametrize("task_kwargs,func_path,func_name", [
-        ({"parser_config": {"auto_keywords": 5}}, "extract_keywords", "extract_keywords"),
-        ({"parser_config": {"auto_questions": 3}}, "generate_questions", "generate_questions"),
-        ({"kb_parser_config": {"tag_kb_ids": ["kb_1"]}}, "apply_tags", "apply_tags"),
-        ({"parser_config": {"enable_metadata": True, "metadata": [{"name": "category", "type": "string"}]}},
-         "generate_metadata", "generate_metadata"),
-    ])
-    async def test_build_chunks_with_post_processing(self, task_kwargs, func_path, func_name):
-        """Test build_chunks triggers post-processing when configured."""
-        ctx = make_task_context(**task_kwargs)
-        service = ChunkService(ctx=ctx)
-
-        mock_rec_ctx = MagicMock()
-        ctx.recording_context = mock_rec_ctx
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings, \
-             patch("rag.svr.task_executor_refactor.chunk_service.get_parser") as mock_get_parser, \
-             patch("rag.svr.task_executor_refactor.chunk_service.run_chunking", new_callable=AsyncMock) as mock_run_chunking, \
-             patch("rag.svr.task_executor_refactor.chunk_service.extract_outline", new_callable=AsyncMock), \
-             patch.object(service, '_prepare_docs_and_upload', new_callable=AsyncMock) as mock_prepare, \
-             patch(f"rag.svr.task_executor_refactor.chunk_service.{func_path}", new_callable=AsyncMock) as mock_fn:
-            mock_settings.DOC_MAXIMUM_SIZE = 10000000
-            mock_get_parser.return_value = MagicMock()
-            mock_run_chunking.return_value = []
-            mock_prepare.return_value = [{"id": "chunk_1", "content_with_weight": "test"}]
-            await service.build_chunks(b"test binary")
-            mock_fn.assert_called_once()
-
-
-class TestChunkServicePrepareDocsAndUpload:
-    """Tests for _prepare_docs_and_upload method."""
-
-
-    @pytest.mark.asyncio
-    async def test_prepare_docs_and_upload_basic(self):
-        """Test basic document preparation."""
-        ctx = make_task_context()
-        service = ChunkService(ctx=ctx)
-
-        cks = [{"content_with_weight": "test chunk"}]
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.STORAGE_IMPL = MagicMock()
-            mock_settings.STORAGE_IMPL.put = MagicMock()
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.image2id", new_callable=AsyncMock):
-
-                docs = await service._prepare_docs_and_upload(cks)
-
-                assert len(docs) == 1
-                assert docs[0]["doc_id"] == "doc_1"
-                assert docs[0]["kb_id"] == "kb_1"
-
-    @pytest.mark.asyncio
-    async def test_prepare_docs_and_upload_with_pagerank(self):
-        """Test document preparation with pagerank."""
-        ctx = make_task_context()
-        ctx.pagerank = 5
-        service = ChunkService(ctx=ctx)
-
-        cks = [{"content_with_weight": "test chunk"}]
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.STORAGE_IMPL = MagicMock()
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.image2id", new_callable=AsyncMock):
-
-                docs = await service._prepare_docs_and_upload(cks)
-
-                assert docs[0].get("pagerank_fea") == 5
-
-
-class TestChunkServiceInsertChunks:
-    """Tests for insert_chunks method."""
-
-
-    @pytest.mark.asyncio
-    async def test_insert_chunks_success(self):
-        """Test successful chunk insertion."""
-        ctx = make_task_context()
-        service = ChunkService(ctx=ctx)
-
-        chunks = [
-            {"id": "chunk_1", "content_with_weight": "test1"},
-            {"id": "chunk_2", "content_with_weight": "test2"},
-        ]
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.DOC_BULK_SIZE = 100
-            mock_settings.docStoreConn = MagicMock()
-            mock_settings.docStoreConn.insert = MagicMock(return_value=None)
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.search.index_name") as mock_index:
-                mock_index.return_value = "test_index"
-
-                with patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_thread:
-                    mock_thread.return_value = None
-
-                    with patch("rag.svr.task_executor_refactor.chunk_service.TaskService") as mock_task:
-                        mock_task.update_chunk_ids = MagicMock()
-
-                        result = await service.insert_chunks("task_1", "tenant_1", "kb_1", chunks)
-
-                        assert result is True
-
-    @pytest.mark.asyncio
-    async def test_insert_chunks_canceled(self):
-        """Test chunk insertion when task is canceled."""
-        ctx = make_task_context()
-        ctx.has_canceled_func = MagicMock(return_value=True)
-        service = ChunkService(ctx=ctx)
-
-        chunks = [{"id": "chunk_1", "content_with_weight": "test1"}]
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.DOC_BULK_SIZE = 100
-            mock_settings.docStoreConn = MagicMock()
-            mock_settings.docStoreConn.insert = MagicMock(return_value=None)
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.search.index_name") as mock_index:
-                mock_index.return_value = "test_index"
-
-                with patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_thread:
-                    mock_thread.return_value = None
-
-                    result = await service.insert_chunks("task_1", "tenant_1", "kb_1", chunks)
-
-                    assert result is False
-                    ctx.progress_cb.assert_called_with(-1, msg="Task has been canceled.")
-
-    @pytest.mark.asyncio
-    async def test_insert_chunks_doc_store_error(self):
-        """Test chunk insertion when doc store returns error."""
-        ctx = make_task_context()
-        service = ChunkService(ctx=ctx)
-
-        chunks = [{"id": "chunk_1", "content_with_weight": "test1"}]
-
-        with patch("rag.svr.task_executor_refactor.chunk_service.settings") as mock_settings:
-            mock_settings.DOC_BULK_SIZE = 100
-            mock_settings.docStoreConn = MagicMock()
-            mock_settings.docStoreConn.insert = MagicMock(return_value="Error message")
-
-            with patch("rag.svr.task_executor_refactor.chunk_service.search.index_name") as mock_index:
-                mock_index.return_value = "test_index"
-
-                with patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_thread:
-                    mock_thread.return_value = "Error"
-
-                    with pytest.raises(Exception, match="Insert chunk error"):
-                        await service.insert_chunks("task_1", "tenant_1", "kb_1", chunks)
-
-
-class TestChunkServiceCreateMotherChunks:
-    """Tests for _create_mother_chunks class method."""
-
-    def test_create_mother_chunks_with_mom_field(self):
-        """Test creating mother chunks from mom field."""
-        chunks = [
-            {"id": "chunk_1", "mom": "Summary text 1", "content_with_weight": "test1"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert len(mothers) == 1
-        assert mothers[0]["content_with_weight"] == "Summary text 1"
-        assert mothers[0]["available_int"] == 0
-
-    def test_create_mother_chunks_with_mom_with_weight_field(self):
-        """Test creating mother chunks from mom_with_weight field."""
-        chunks = [
-            {"id": "chunk_1", "mom_with_weight": "Summary text 2", "content_with_weight": "test1"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert len(mothers) == 1
-        assert mothers[0]["content_with_weight"] == "Summary text 2"
-
-    def test_create_mother_chunks_no_mom_field(self):
-        """Test creating mother chunks when no mom field present."""
-        chunks = [
-            {"id": "chunk_1", "content_with_weight": "test1"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert len(mothers) == 0
-
-    def test_create_mother_chunks_empty_mom(self):
-        """Test creating mother chunks with empty mom field."""
-        chunks = [
-            {"id": "chunk_1", "mom": "", "content_with_weight": "test1"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert len(mothers) == 0
-
-    def test_create_mother_chunks_deduplicates_ids(self):
-        """Test that mother chunks deduplicate by ID."""
-        chunks = [
-            {"id": "chunk_1", "mom": "Same summary", "content_with_weight": "test1"},
-            {"id": "chunk_2", "mom": "Same summary", "content_with_weight": "test2"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert len(mothers) == 1
-
-    def test_create_mother_chunks_filters_fields(self):
-        """Test that mother chunks only keep allowed fields."""
-        chunks = [
-            {"id": "chunk_1", "mom": "Summary", "extra_field": "should be removed", "content_with_weight": "test1"},
-        ]
-
-        mothers = ChunkService._create_mother_chunks(chunks)
-
-        assert "extra_field" not in mothers[0]
-        assert "id" in mothers[0]
-        assert "content_with_weight" in mothers[0]
diff --git a/test/unit_test/rag/svr/task_executor_refactor/test_task_handler.py b/test/unit_test/rag/svr/task_executor_refactor/test_task_handler.py
deleted file mode 100644
index 23997c3bff..0000000000
--- a/test/unit_test/rag/svr/task_executor_refactor/test_task_handler.py
+++ /dev/null
@@ -1,316 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-"""
-Unit tests for TaskHandler module.
-
-Mock strategy: external boundaries (LLMBundle, model config services, settings)
-are mocked so that ``handle()`` and ``_bind_embedding_model`` execute their
-real logic.  Heavy orchestration methods (``_run_standard_chunking``,
-``_run_raptor``, ``_run_graphrag``) are mocked since they are tested
-exhaustively in the integration test suite.
-
-Stable pure helpers (_build_toc) are tested directly.
-"""
-
-import pytest
-from unittest.mock import MagicMock, AsyncMock, patch
-
-from rag.svr.task_executor_refactor.task_handler import TaskHandler
-
-# Reuse shared helpers from conftest
-from test.unit_test.rag.svr.task_executor_refactor.conftest import (
-    patch_embedding_binding,
-    create_mock_settings,
-    make_task_context,
-)
-
-
-class TestTaskHandlerHandleTask:
-    """Tests for the public handle_task() entry point."""
-
-    @pytest.mark.asyncio
-    async def test_handle_task_calls_handle(self):
-        """Test handle_task delegates to handle()."""
-        ctx = MagicMock()
-        ctx.id = "task_1"
-        ctx.tenant_id = "tenant_1"
-        ctx.kb_id = "kb_1"
-        ctx.doc_id = "doc_1"
-        ctx.has_canceled_func = MagicMock(return_value=False)
-        handler = TaskHandler(ctx=ctx)
-        handler.handle = AsyncMock()
-        await handler.handle_task()
-        handler.handle.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_task_cleanup_on_cancel(self):
-        """Test handle_task cleans up docStore when canceled."""
-        from common import settings
-        mock_doc_store = MagicMock()
-        mock_doc_store.index_exist = MagicMock(return_value=True)
-        mock_doc_store.delete = MagicMock(return_value=None)
-        orig = settings.docStoreConn
-        settings.docStoreConn = mock_doc_store
-        try:
-            ctx = MagicMock()
-            ctx.id = "task_1"
-            ctx.tenant_id = "tenant_1"
-            ctx.kb_id = "kb_1"
-            ctx.doc_id = "doc_1"
-            ctx.has_canceled_func = MagicMock(return_value=True)
-            ctx.recording_context = MagicMock()
-            handler = TaskHandler(ctx=ctx)
-            handler.handle = AsyncMock(side_effect=Exception("test error"))
-            with pytest.raises(Exception, match="test error"):
-                await handler.handle_task()
-            mock_doc_store.delete.assert_called()
-        finally:
-            settings.docStoreConn = orig
-
-    @pytest.mark.asyncio
-    async def test_handle_task_cleanup_skips_when_index_missing(self):
-        """Cancel cleanup should not call delete when the index doesn't exist."""
-        from common import settings
-        mock_doc_store = MagicMock()
-        mock_doc_store.index_exist = MagicMock(return_value=False)
-        mock_doc_store.delete = MagicMock()
-        orig = settings.docStoreConn
-        settings.docStoreConn = mock_doc_store
-        try:
-            ctx = MagicMock()
-            ctx.id = "task_1"
-            ctx.tenant_id = "tenant_1"
-            ctx.kb_id = "kb_1"
-            ctx.doc_id = "doc_1"
-            ctx.has_canceled_func = MagicMock(return_value=True)
-            ctx.recording_context = MagicMock()
-            handler = TaskHandler(ctx=ctx)
-            handler.handle = AsyncMock(side_effect=Exception("test error"))
-            with pytest.raises(Exception, match="test error"):
-                await handler.handle_task()
-            mock_doc_store.delete.assert_not_called()
-        finally:
-            settings.docStoreConn = orig
-
-
-class TestTaskHandlerHandle:
-    """Tests for the public handle() method.
-
-    External boundaries (LLMBundle, model config services, settings) are mocked
-    so that ``_bind_embedding_model`` and ``_init_kb`` execute their real logic
-    through ``handle()``.  Only the heavy orchestration methods
-    (``_run_standard_chunking``, ``_run_raptor``, ``_run_graphrag``) are mocked.
-    """
-
-    # ── Context factory: make_task_context from conftest — see import above
-
-    @pytest.mark.asyncio
-    async def test_handle_memory_task(self):
-        """Test handle returns after dispatching memory task — no further processing."""
-        ctx = make_task_context(task_type="memory")
-        ctx.raw_task = {"memory_id": "mem_1", "id": "task_1"}
-
-        with patch("rag.svr.task_executor_refactor.task_handler.handle_save_to_memory_task",
-                   new_callable=AsyncMock) as mock_handle:
-
-            handler = TaskHandler(ctx=ctx)
-            handler._run_standard_chunking = AsyncMock()
-            handler._run_dataflow = AsyncMock()
-            await handler.handle()
-
-            mock_handle.assert_called_once_with(ctx.raw_task)
-            # After memory task, should return immediately — no further routing
-            handler._run_standard_chunking.assert_not_called()
-            handler._run_dataflow.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_handle_dataflow_task(self):
-        """Test handle dispatches dataflow tasks (after embedding binding + init_kb)."""
-        ctx = make_task_context(task_type="dataflow", doc_id="doc_1")
-
-        with patch_embedding_binding(), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings", create_mock_settings()), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"):
-
-            handler = TaskHandler(ctx=ctx)
-            handler._run_dataflow = AsyncMock()
-            await handler.handle()
-            handler._run_dataflow.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_canceled_task(self):
-        """Test handle returns early when task is canceled."""
-        ctx = make_task_context(has_canceled_func=MagicMock(return_value=True))
-
-        handler = TaskHandler(ctx=ctx)
-        await handler.handle()
-        ctx.progress_cb.assert_called_once_with(-1, msg="Task has been canceled.")
-
-    @pytest.mark.asyncio
-    async def test_handle_standard_chunking(self):
-        """Test handle routes to standard chunking.
-
-        ``_bind_embedding_model`` and ``_init_kb`` run their real code;
-        only the external boundary (LLM API, settings) is mocked.
-        """
-        ctx = make_task_context()
-
-        with patch_embedding_binding(), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings", create_mock_settings()), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"):
-
-            handler = TaskHandler(ctx=ctx)
-            handler._run_standard_chunking = AsyncMock()
-            await handler.handle()
-            handler._run_standard_chunking.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_raptor_task(self):
-        """Test handle routes to RAPTOR with real embedding binding."""
-        ctx = make_task_context(task_type="raptor")
-
-        with patch_embedding_binding(), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings", create_mock_settings()), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"):
-
-            handler = TaskHandler(ctx=ctx)
-            handler._run_raptor = AsyncMock()
-            await handler.handle()
-            handler._run_raptor.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_graphrag_task(self):
-        """Test handle routes to GraphRAG with real embedding binding."""
-        ctx = make_task_context(task_type="graphrag")
-
-        with patch_embedding_binding(), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings", create_mock_settings()), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"):
-
-            handler = TaskHandler(ctx=ctx)
-            handler._run_graphrag = AsyncMock()
-            await handler.handle()
-            handler._run_graphrag.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_embedding_model_failure(self):
-        """Test handle returns early when embedding model binding fails.
-
-        ``LLMBundle`` is patched to raise, so ``_bind_embedding_model``
-        itself raises — no need to mock the private method.
-        """
-        ctx = make_task_context()
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_cfg, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle:
-
-            mock_cfg.return_value = MagicMock()
-            mock_default.return_value = MagicMock()
-            mock_bundle.side_effect = RuntimeError("embedding service unavailable")
-
-            handler = TaskHandler(ctx=ctx)
-            with pytest.raises(RuntimeError, match="embedding service unavailable"):
-                await handler.handle()
-
-    @pytest.mark.asyncio
-    async def test_handle_storage_binary_none_raises_file_not_found(self):
-        """Verify that None binary from storage raises FileNotFoundError."""
-        ctx = make_task_context()
-
-        with patch_embedding_binding(), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings", create_mock_settings()), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"), \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService.get_storage_address",
-                   return_value=("bucket_test", "name_test")), \
-             patch.object(TaskHandler, "_get_storage_binary", new_callable=AsyncMock, return_value=None):
-
-            handler = TaskHandler(ctx=ctx)
-            # Do NOT mock _run_standard_chunking — we want real code path for the check
-            with pytest.raises(FileNotFoundError, match="Can not find file <test.pdf> from minio"):
-                await handler.handle()
-
-
-class TestTaskHandlerBuildToc:
-    """Tests for _build_toc — stable pure helper (requires LLM mocking)."""
-
-    def test_build_toc_with_empty_docs(self):
-        """Test _build_toc returns None when run_toc_from_text returns empty."""
-        ctx = MagicMock()
-        ctx.tenant_id = "tenant_1"
-        ctx.llm_id = "llm_1"
-        ctx.language = "en"
-
-        docs = [{"id": "chunk_1", "content_with_weight": "text", "page_num_int": [1], "top_int": [0]}]
-
-        def mock_asyncio_run(coro):
-            coro.close()
-            return []
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_cfg, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.asyncio.run", side_effect=mock_asyncio_run):
-
-            mock_cfg.return_value = MagicMock()
-            mock_msg = MagicMock()
-            mock_bundle.return_value.__enter__.return_value = mock_msg
-
-            result = TaskHandler._build_toc(ctx, docs, MagicMock())
-            assert result is None
-
-    def test_build_toc_with_results(self):
-        """Test _build_toc builds TOC chunk when results exist."""
-        ctx = MagicMock()
-        ctx.tenant_id = "tenant_1"
-        ctx.llm_id = "llm_1"
-        ctx.language = "en"
-
-        docs = [{"id": "chunk_0", "content_with_weight": "text", "doc_id": "doc_1", "page_num_int": [1], "top_int": [0]}]
-        toc_result = [{"chunk_id": "0", "title": "Section 1"}]
-
-        def mock_asyncio_run(coro):
-            coro.close()
-            return toc_result
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_cfg, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.asyncio.run", side_effect=mock_asyncio_run):
-
-            mock_cfg.return_value = MagicMock()
-            mock_msg = MagicMock()
-            mock_bundle.return_value.__enter__.return_value = mock_msg
-
-            result = TaskHandler._build_toc(ctx, docs, MagicMock())
-            assert result is not None
-            assert "toc_kwd" in result
-            assert result["toc_kwd"] == "toc"
-            assert result["available_int"] == 0
-
-
-class TestTaskHandlerInit:
-    """Tests for TaskHandler initialization."""
-
-    def test_init_stores_context_and_hook(self):
-        ctx = MagicMock()
-        hook = MagicMock()
-        handler = TaskHandler(ctx=ctx, billing_hook=hook)
-        assert handler._task_context is ctx
-        assert handler._billing_hook is hook
-
-    def test_init_default_hook_none(self):
-        ctx = MagicMock()
-        handler = TaskHandler(ctx=ctx)
-        assert handler._billing_hook is None
diff --git a/test/unit_test/rag/svr/task_executor_refactor/test_task_handler_integration.py b/test/unit_test/rag/svr/task_executor_refactor/test_task_handler_integration.py
deleted file mode 100644
index 9c710e6ec2..0000000000
--- a/test/unit_test/rag/svr/task_executor_refactor/test_task_handler_integration.py
+++ /dev/null
@@ -1,856 +0,0 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-"""
-Integration tests for TaskHandler orchestration.
-"""
-
-import asyncio
-import gc
-from typing import Any, Dict
-from unittest.mock import MagicMock, AsyncMock, patch
-
-import pytest
-
-from rag.svr.task_executor_refactor.task_handler import TaskHandler
-from rag.svr.task_executor_refactor.task_context import TaskContext, TaskLimiters, TaskCallbacks
-from rag.svr.task_executor_refactor.recording_context import BaseRecordingContext, RecordingContext
-from rag.svr.task_executor_refactor.constants import CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID
-
-# Import shared helpers from conftest
-from test.unit_test.rag.svr.task_executor_refactor.conftest import (
-    AsyncMockLimiter,
-    create_mock_embedding_model,
-    create_default_chunks,
-    create_mock_settings,
-    create_mock_chunk_service,
-    make_task_dict,
-    patch_get_storage_binary,
-    patch_task_handler_settings,
-    mock_thread_return_binary,
-    mock_thread_return_none,
-)
-
-
-def create_task_context(
-    task_dict: Dict[str, Any],
-    is_canceled: bool = False,
-    recording_context: BaseRecordingContext | None = None,
-) -> TaskContext:
-    """Create a real TaskContext with mocked limiters and callbacks.
-
-    Args:
-        task_dict: Task dictionary with all task attributes.
-        is_canceled: If True, has_canceled_func returns True.
-        recording_context: RecordingContext to inject. If None, a new one
-            is created automatically so that recording_context access works.
-
-    Returns:
-        TaskContext with all required dependencies injected.
-    """
-    if recording_context is None:
-        recording_context = RecordingContext()
-    limiter = AsyncMockLimiter()
-    progress_callback = MagicMock()
-    ctx = TaskContext(
-        task=task_dict,
-        limiters=TaskLimiters(
-            chat=limiter,
-            minio=limiter,
-            chunk=limiter,
-            embed=limiter,
-            kg=limiter,
-        ),
-        callbacks=TaskCallbacks(
-            progress=progress_callback,
-            has_canceled=MagicMock(return_value=is_canceled),
-        ),
-        recording_context=recording_context,
-    )
-    # Add progress_callback property for task_handler compatibility
-    ctx.progress_callback = progress_callback
-    # Add set_progress_cb method for task_handler compatibility
-    ctx.set_progress_cb = lambda cb: setattr(ctx.callbacks, 'progress_cb', cb)
-    return ctx
-
-
-class TestStandardChunkingPipelineIntegration:
-    """P0: Integration tests for the complete standard chunking pipeline."""
-
-    @pytest.mark.asyncio
-    async def test_full_chunking_pipeline_records_task_status(self):
-        """Verify that the complete pipeline records task_status as 'completed'."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            task_status = recording_ctx.get("task_status")
-            assert task_status == "completed", f"Expected task_status='completed', got {task_status}"
-
-    @pytest.mark.asyncio
-    async def test_full_chunking_pipeline_records_insertion_result(self):
-        """Verify that insertion_result is recorded as 'success'."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            insertion_result = recording_ctx.get("insertion_result")
-            assert insertion_result == "success", f"Expected insertion_result='success', got {insertion_result}"
-
-    @pytest.mark.asyncio
-    async def test_full_chunking_pipeline_records_chunk_ids(self):
-        """Verify that chunk_ids_count is recorded after build_chunks."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunks = create_default_chunks(count=3)
-        mock_chunk_service = create_mock_chunk_service(chunks=mock_chunks)
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.run_toc_from_text", new_callable=AsyncMock) as mock_run_toc, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-            mock_run_toc.return_value = []  # TOC returns empty when not enabled
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            chunk_ids_count = recording_ctx.get("chunk_ids_count")
-            assert chunk_ids_count is not None, "chunk_ids_count should be recorded"
-            assert chunk_ids_count == 3, f"Expected chunk_ids_count=3, got {chunk_ids_count}"
-
-    @pytest.mark.asyncio
-    async def test_full_chunking_pipeline_records_token_count(self):
-        """Verify that token_count and vector_size are recorded after embedding."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            token_count = recording_ctx.get("token_count")
-            vector_size = recording_ctx.get("vector_size")
-
-            assert token_count is not None, "token_count should be recorded"
-            assert vector_size is not None, "vector_size should be recorded"
-            assert vector_size == 128, f"Expected vector_size=128, got {vector_size}"
-
-    @pytest.mark.asyncio
-    async def test_full_chunking_pipeline_progress_callback_invoked(self):
-        """Verify that progress_callback is invoked multiple times during pipeline."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            ctx.progress_callback.assert_called()
-            call_count = ctx.progress_callback.call_count
-            assert call_count > 0, "progress_callback should have been invoked at least once"
-
-
-class TestTaskCancellationCleanupIntegration:
-    """P0: Integration tests for task cancellation cleanup flow."""
-
-    @pytest.mark.asyncio
-    async def test_canceled_task_calls_docstore_delete(self):
-        """Verify that docStoreConn.delete is called when task is canceled."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict, is_canceled=True)
-        mock_settings = create_mock_settings()
-
-        call_log = []
-
-        def mock_thread_impl(func, *args, **kwargs):
-            # Get the actual method name from the mock
-            func_repr = repr(func)
-            call_log.append(func_repr)
-            if 'index_exist' in func_repr:
-                return True
-            if 'delete' in func_repr:
-                return {"result": "deleted"}
-            return {"result": "deleted"}
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_index"), \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec", side_effect=mock_thread_impl):
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle_task()
-
-            # Verify delete was called by checking the call log
-            delete_calls = [c for c in call_log if 'delete' in c]
-            assert len(delete_calls) >= 1, f"Expected at least one delete call, got: {call_log}"
-
-    @pytest.mark.asyncio
-    async def test_canceled_task_progress_callback_with_negative_one(self):
-        """Verify that progress_callback is called with -1 when task is canceled."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict, is_canceled=True)
-        mock_settings = create_mock_settings()
-
-        def mock_thread_impl(func, *args, **kwargs):
-            func_repr = repr(func)
-            if 'index_exist' in func_repr:
-                return True
-            if 'delete' in func_repr:
-                return {"result": "deleted"}
-            return {"result": "deleted"}
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_index"), \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec", side_effect=mock_thread_impl):
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle_task()
-
-            ctx.progress_callback.assert_called()
-            call_args_list = ctx.progress_callback.call_args_list
-            # Check for -1 in any position of the call arguments
-            has_negative_progress = False
-            for call in call_args_list:
-                # Check positional args
-                for arg in call[0]:
-                    if arg == -1:
-                        has_negative_progress = True
-                        break
-                # Check keyword args
-                if call[1].get("prog") == -1:
-                    has_negative_progress = True
-                if has_negative_progress:
-                    break
-            assert has_negative_progress, f"progress_callback should have been called with -1 progress. Calls: {call_args_list}"
-
-    @pytest.mark.asyncio
-    async def test_canceled_task_does_not_proceed_to_chunking(self):
-        """Verify that canceled task does not proceed to embedding model binding."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict, is_canceled=True)
-        mock_settings = create_mock_settings()
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default:
-
-            mock_index_name.return_value = "test_index"
-            mock_settings.docStoreConn.index_exist.return_value = True
-            mock_settings.docStoreConn.delete.return_value = {"result": "deleted"}
-
-            async def mock_thread_impl(func, *args, **kwargs):
-                return {"result": "deleted"}
-
-            mock_thread_exec.side_effect = mock_thread_impl
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle_task()
-
-            mock_bundle.assert_not_called()
-
-
-class TestRaptorPipelineIntegration:
-    """P1: Integration tests for the RAPTOR pipeline."""
-
-    @pytest.mark.asyncio
-    async def test_raptor_pipeline_records_task_status(self):
-        """Verify that RAPTOR pipeline records task_status."""
-        task_dict = make_task_dict(doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=["doc1", "doc2"], task_type="raptor", parser_config={"raptor": {"use_raptor": False}}, kb_parser_config={"raptor": {"use_raptor": False}})
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_kb = MagicMock()
-        mock_kb.id = "kb_test"
-        mock_kb.parser_config = {"raptor": {"use_raptor": False}}
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.KnowledgebaseService") as mock_kb_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.RaptorService") as mock_raptor_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_index_name.return_value = "test_index"
-            mock_kb_service.get_by_id.return_value = (True, mock_kb)
-            mock_kb_service.update_by_id.return_value = True
-            mock_raptor_service.return_value.run_raptor_for_kb = AsyncMock(return_value=([], 0, []))
-            mock_chunk_service.return_value.insert_chunks = AsyncMock(return_value=True)
-            mock_doc_service.increment_chunk_num = MagicMock()
-
-            mock_thread_exec.side_effect = mock_thread_return_none
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            task_status = recording_ctx.get("task_status")
-            assert task_status == "completed", f"Expected task_status='completed', got {task_status}"
-
-    @pytest.mark.asyncio
-    async def test_raptor_pipeline_enables_raptor_if_not_configured(self):
-        """Verify that RAPTOR is enabled if not already configured."""
-        task_dict = make_task_dict(doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=["doc1", "doc2"], task_type="raptor", parser_config={"raptor": {"use_raptor": False}}, kb_parser_config={"raptor": {"use_raptor": False}})
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_kb = MagicMock()
-        mock_kb.id = "kb_test"
-        mock_kb.parser_config = {"raptor": {"use_raptor": False}}
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.KnowledgebaseService") as mock_kb_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.RaptorService") as mock_raptor_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_index_name.return_value = "test_index"
-            mock_kb_service.get_by_id.return_value = (True, mock_kb)
-            mock_kb_service.update_by_id.return_value = True
-            mock_raptor_service.return_value.run_raptor_for_kb = AsyncMock(return_value=([], 0, []))
-            mock_chunk_service.return_value.insert_chunks = AsyncMock(return_value=True)
-            mock_doc_service.increment_chunk_num = MagicMock()
-
-            mock_thread_exec.side_effect = mock_thread_return_none
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            # Check that the kb parser_config was updated
-            mock_kb_service.update_by_id.assert_called_once()
-            call_args = mock_kb_service.update_by_id.call_args
-            update_dict = call_args[0][1]
-            assert update_dict.get("parser_config", {}).get("raptor", {}).get("use_raptor") is True, \
-                "RAPTOR should be enabled in parser_config after running"
-
-
-class TestEmbeddingModelBindingFailureIntegration:
-    """P1: Integration tests for embedding model binding failure."""
-
-    @pytest.mark.asyncio
-    async def test_embedding_binding_failure_raises_exception(self):
-        """Verify that embedding model binding failure raises an exception."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default:
-
-            mock_get_config.side_effect = Exception("Model not found")
-            mock_get_default.side_effect = Exception("Model not found")
-
-            handler = TaskHandler(ctx=ctx)
-
-            with pytest.raises(Exception, match="Model not found"):
-                await handler.handle()
-
-    @pytest.mark.asyncio
-    async def test_embedding_binding_failure_calls_progress_callback(self):
-        """Verify that embedding model binding failure calls progress_callback."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default:
-
-            mock_get_config.side_effect = Exception("Model not found")
-            mock_get_default.side_effect = Exception("Model not found")
-
-            handler = TaskHandler(ctx=ctx)
-
-            with pytest.raises(Exception):
-                await handler.handle()
-
-            ctx.progress_callback.assert_called()
-
-
-class TestDataflowPipelineIntegration:
-    """P2: Integration tests for the dataflow pipeline."""
-
-    @pytest.mark.asyncio
-    async def test_dataflow_pipeline_calls_dataflow_service(self):
-        """Verify that dataflow pipeline calls DataflowService.run_dataflow()."""
-        task_dict = make_task_dict(doc_id=CANVAS_DEBUG_DOC_ID, task_type="dataflow")
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-
-        with patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name", return_value="test_idx"), \
-             patch("rag.svr.task_executor_refactor.task_handler.settings") as mock_settings, \
-             patch("rag.svr.task_executor_refactor.task_handler.DataflowService") as mock_dataflow_service:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_settings.docStoreConn = MagicMock()
-            mock_settings.docStoreConn.create_idx = MagicMock()
-
-            mock_instance = MagicMock()
-            mock_instance.run_dataflow = AsyncMock(return_value=None)
-            mock_dataflow_service.return_value = mock_instance
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            mock_dataflow_service.assert_called_once()
-            mock_instance.run_dataflow.assert_called_once()
-
-
-class TestTocAsyncFlowIntegration:
-    """P2: Integration tests for TOC async flow."""
-
-    @pytest.mark.asyncio
-    async def test_toc_async_flow_creates_toc_thread(self):
-        """Verify that TOC async flow creates a TOC thread when enabled."""
-
-        task_dict = make_task_dict(parser_config={"auto_keywords": 0, "auto_questions": 0, "enable_metadata": False, "toc_extraction": True})
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.run_toc_from_text", new_callable=AsyncMock) as mock_run_toc, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls, \
-             patch("rag.svr.task_executor_refactor.post_processor.DocumentService") as mock_post_doc_service:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-            mock_run_toc.return_value = [{"title": "Test TOC", "level": 1}]
-            mock_post_doc_service.increment_chunk_num = MagicMock()
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            mock_run_toc.assert_called()
-
-        # Explicit cleanup to prevent resource leaks
-        del mock_embedding, mock_settings, mock_chunk_service
-        del mock_get_config, mock_get_default, mock_bundle, mock_file_service
-        del mock_index_name, mock_doc_service, mock_chunk_service_cls, mock_run_toc, mock_post_doc_service
-        del mock_thread_exec, mock_chunk_thread_exec
-        # Allow pending callbacks to execute
-        await asyncio.sleep(0)
-        gc.collect()
-
-    @pytest.mark.asyncio(loop_scope="function")
-    @pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
-    async def test_toc_async_flow_does_not_create_thread_when_disabled(self):
-        """Verify that TOC async flow does not create a thread when disabled.
-        
-        Note: This test has a known issue with resource leaks (unclosed sockets and
-        event loops) when run as part of the full test suite. The warning filter
-        above suppresses these warnings temporarily. The root cause is related to
-        asyncio.to_thread creating new event loops that are not properly cleaned up
-        by pytest-asyncio.
-        """
-
-        task_dict = make_task_dict(parser_config={"auto_keywords": 0, "auto_questions": 0, "enable_metadata": False, "toc_extraction": True})
-        task_dict["parser_config"]["toc_extraction"] = False
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.run_toc_from_text", new_callable=AsyncMock) as mock_run_toc, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            mock_run_toc.assert_not_called()
-
-        # Explicit cleanup to prevent resource leaks
-        del mock_embedding, mock_settings, mock_chunk_service
-        del mock_get_config, mock_get_default, mock_bundle, mock_file_service
-        del mock_index_name, mock_doc_service, mock_chunk_service_cls, mock_run_toc
-        del mock_thread_exec, mock_chunk_thread_exec
-        # Allow pending callbacks to execute and close event loop
-        await asyncio.sleep(0)
-        # Cancel all pending tasks
-        current_task = asyncio.current_task()
-        pending = [t for t in asyncio.all_tasks() if t is not current_task and not t.done()]
-        for task in pending:
-            task.cancel()
-        if pending:
-            await asyncio.gather(*pending, return_exceptions=True)
-        gc.collect()
-
-
-class TestRecordingContextDataFlowAssertions:
-    """P2: Integration tests for RecordingContext data flow assertions."""
-
-    @pytest.mark.asyncio
-    async def test_recording_context_captures_file_size_check(self):
-        """Verify that RecordingContext captures file_size_exceeded result."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            file_size_exceeded = recording_ctx.get("file_size_exceeded")
-            assert file_size_exceeded is None or file_size_exceeded is False, \
-                f"Expected file_size_exceeded to be False/None for small file, got {file_size_exceeded}"
-
-    @pytest.mark.asyncio
-    async def test_recording_context_captures_parser_id(self):
-        """Verify that RecordingContext captures parser_id from task context."""
-        task_dict = make_task_dict()
-        ctx = create_task_context(task_dict)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_chunk_service = create_mock_chunk_service()
-
-        with patch_get_storage_binary(), \
-             patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.File2DocumentService") as mock_file_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.chunk_service.thread_pool_exec") as mock_chunk_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService") as mock_doc_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.ChunkService") as mock_chunk_service_cls:
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_file_service.get_storage_address.return_value = ("bucket_test", "name_test")
-            mock_index_name.return_value = "test_index"
-            mock_doc_service.increment_chunk_num = MagicMock()
-            mock_doc_service.get_document_metadata.return_value = {}
-            mock_doc_service.update_document_metadata = MagicMock()
-            mock_chunk_service_cls.return_value = mock_chunk_service
-
-            mock_thread_exec.side_effect = mock_thread_return_binary
-            mock_chunk_thread_exec.side_effect = mock_thread_return_binary
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            recording_ctx = ctx.recording_context
-            # parser_id is available in the task context, verify task completion
-            task_status = recording_ctx.get("task_status")
-            assert task_status == "completed", f"Expected task_status='completed', got {task_status}"
-            # Verify the parser_id is accessible from the task context
-            assert ctx.parser_id == "naive", f"Expected parser_id='naive', got {ctx.parser_id}"
-
-
-class TestGraphragPipelineIntegration:
-    """P2: Integration tests for GraphRAG pipeline default configuration."""
-
-    @pytest.mark.asyncio
-    async def test_graphrag_pipeline_configures_full_defaults(self):
-        """Verify that GraphRAG configures all default parameters when not already set."""
-        task_dict = make_task_dict(doc_ids=["doc1", "doc2"], task_type="graphrag")
-        rec_ctx = RecordingContext()
-        ctx = create_task_context(task_dict, recording_context=rec_ctx)
-        mock_embedding = create_mock_embedding_model(vector_size=128)
-        mock_settings = create_mock_settings()
-        mock_kb = MagicMock()
-        mock_kb.id = "kb_test"
-        mock_kb.parser_config = {}
-
-        with patch_task_handler_settings(mock_settings), \
-             patch("rag.svr.task_executor_refactor.chunk_service.settings", mock_settings), \
-             patch("rag.svr.task_executor_refactor.task_handler.get_model_config_from_provider_instance") as mock_get_config, \
-             patch("rag.svr.task_executor_refactor.task_handler.LLMBundle") as mock_bundle, \
-             patch("rag.svr.task_executor_refactor.task_handler.get_tenant_default_model_by_type") as mock_get_default, \
-             patch("rag.svr.task_executor_refactor.task_handler.search.index_name") as mock_index_name, \
-             patch("rag.svr.task_executor_refactor.task_handler.thread_pool_exec") as mock_thread_exec, \
-             patch("rag.svr.task_executor_refactor.task_handler.KnowledgebaseService") as mock_kb_service, \
-             patch("rag.svr.task_executor_refactor.task_handler.run_graphrag_for_kb") as mock_run_graphrag, \
-             patch("rag.svr.task_executor_refactor.task_handler.DocumentService"):
-
-            mock_get_config.return_value = MagicMock()
-            mock_get_default.return_value = MagicMock()
-            mock_bundle.return_value = mock_embedding
-            mock_index_name.return_value = "test_index"
-            mock_kb_service.get_by_id.return_value = (True, mock_kb)
-            mock_kb_service.update_by_id.return_value = True
-            mock_run_graphrag.return_value = {"status": "completed"}
-
-            mock_thread_exec.side_effect = mock_thread_return_none
-
-            handler = TaskHandler(ctx=ctx)
-            await handler.handle()
-
-            # Verify update_by_id was called with full default config
-            mock_kb_service.update_by_id.assert_called_once()
-            call_args = mock_kb_service.update_by_id.call_args
-            config = call_args[0][1]["parser_config"]["graphrag"]
-            assert config["use_graphrag"] is True
-            assert "organization" in config["entity_types"]
-            assert "person" in config["entity_types"]
-            assert "geo" in config["entity_types"]
-            assert "event" in config["entity_types"]
-            assert "category" in config["entity_types"]
-            assert config["method"] == "light"
-            assert "batch_chunk_token_size" in config
-            assert "retry_attempts" in config
-            assert "retry_backoff_seconds" in config
-            assert "retry_backoff_max_seconds" in config
-            assert "build_subgraph_timeout_per_chunk_seconds" in config
-            assert "build_subgraph_min_timeout_seconds" in config
-            assert "merge_timeout_seconds" in config
-            assert "resolution_timeout_seconds" in config
-            assert "community_timeout_seconds" in config
-            assert "lock_acquire_timeout_seconds" in config, \
-                "All GraphRAG default config parameters should be present"
diff --git a/test/unit_test/rag/test_raptor_psi_tree_builder.py b/test/unit_test/rag/test_raptor_psi_tree_builder.py
index 5590d928f2..aa4a16101d 100644
--- a/test/unit_test/rag/test_raptor_psi_tree_builder.py
+++ b/test/unit_test/rag/test_raptor_psi_tree_builder.py
@@ -15,8 +15,10 @@
 #
 
 import importlib
+import os
 import sys
 import types
+from unittest.mock import MagicMock
 
 import pytest
 
@@ -113,10 +115,28 @@ def raptor_module(monkeypatch):
     monkeypatch.setitem(sys.modules, "common.token_utils", token_utils_module)
     monkeypatch.setitem(sys.modules, "rag.graphrag.utils", graphrag_utils_module)
     monkeypatch.setitem(sys.modules, "common.misc_utils", misc_utils_module)
-    monkeypatch.delitem(sys.modules, "rag.raptor", raising=False)
-    module = importlib.import_module("rag.raptor")
+    # Create stub parent packages and load raptor directly via spec_from_file_location
+    # to bypass rag/advanced_rag/__init__.py (which triggers ES connection etc.).
+    _test_dir = os.path.dirname(__file__)
+    _rag_adv_kc_dir = os.path.normpath(os.path.join(_test_dir, "../../../rag/advanced_rag/knowlege_compile"))
+    _rag_adv = types.ModuleType("rag.advanced_rag")
+    _rag_adv.__path__ = [os.path.normpath(os.path.join(_test_dir, "../../../rag/advanced_rag"))]
+    _rag_adv.__package__ = "rag.advanced_rag"
+    monkeypatch.setitem(sys.modules, "rag.advanced_rag", _rag_adv)
+    _rag_adv_kc = types.ModuleType("rag.advanced_rag.knowlege_compile")
+    _rag_adv_kc.__path__ = [_rag_adv_kc_dir]
+    _rag_adv_kc.__package__ = "rag.advanced_rag.knowlege_compile"
+    monkeypatch.setitem(sys.modules, "rag.advanced_rag.knowlege_compile", _rag_adv_kc)
+    monkeypatch.delitem(sys.modules, "rag.advanced_rag.knowlege_compile.raptor", raising=False)
+    _raptor_spec = importlib.util.spec_from_file_location(
+        "rag.advanced_rag.knowlege_compile.raptor",
+        os.path.join(_rag_adv_kc_dir, "raptor.py"),
+    )
+    module = importlib.util.module_from_spec(_raptor_spec)
+    sys.modules["rag.advanced_rag.knowlege_compile.raptor"] = module
+    _raptor_spec.loader.exec_module(module)
     yield module
-    monkeypatch.delitem(sys.modules, "rag.raptor", raising=False)
+    monkeypatch.delitem(sys.modules, "rag.advanced_rag.knowlege_compile.raptor", raising=False)
 
 
 class FakeChatModel:
@@ -372,7 +392,7 @@ async def test_psi_tree_builder_materializes_rebalanced_summary_layers_without_u
     def fail_umap(*args, **kwargs):
         raise AssertionError("Psi tree builder must use original embeddings, not UMAP")
 
-    monkeypatch.setattr(raptor_module.umap, "UMAP", fail_umap)
+    monkeypatch.setattr("umap.UMAP", fail_umap)
     raptor = _make_raptor(raptor_module, max_cluster=2)
 
     chunks, layers = await raptor(_chunks(), random_state=0)