Feat: add gbrain compile template for session/memory data (#16613)

2026-07-04 01:29:35 +08:00 · 2026-07-03 18:22:29 +08:00
parent ffc4d29a06
commit 706fa4e87a
3 changed files with 200 additions and 1 deletions
--- a/api/db/init_data/compilation_templates/session_essence.yaml
+++ b/api/db/init_data/compilation_templates/session_essence.yaml
@@ -0,0 +1,50 @@
+kind: session_essence
+display_name: Session Essence — Cross-source entity synopses from conversations
+config:
+  kind: knowledge_graph
+  entity:
+    description: >-
+      You are a robust entity and fact extractor for conversational data.
+    fields:
+      - type: person
+        description: A person mentioned or participating in the conversation.
+        rule: |
+          - Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
+          - Max length: 60 characters.
+      - type: org
+        description: Organization, company, team, or department.
+        rule: |
+          - Use the official name when possible (e.g., "Acme Corp").
+          - Max length: 80 characters.
+      - type: topic
+        description: Discussed topic, theme, or subject area.
+        rule: |
+          - Use the phrasing from the conversation (e.g., "API migration").
+          - Max length: 80 characters.
+      - type: fact
+        description: A verifiable factual statement extracted from the conversation.
+        rule: |
+          - Must be a single proposition (subject + predicate + object).
+          - Must be directly supported by the source text.
+          - Do not include opinions, speculation, or hypotheticals.
+        fact_fields:
+          - subject
+          - predicate
+          - object
+          - polarity
+          - confidence
+  relation:
+    description: >-
+      You are an expert in extracting semantic relations from conversations.
+    fields:
+      - type: mentions
+        description: One entity references or brings up another.
+        rule: |
+          - Direction from referrer to referred: (A mentions B).
+  synthesis:
+    enabled: true
+    compile_kwd: "essence"
+    example: |
+      Write a 3-5 sentence executive summary about the entity.
+      Synthesize what multiple sources collectively say.
+      Output ONLY the paragraph. No headers, no JSON, no preamble.
--- a/api/db/init_data/compilation_templates/session_graph.yaml
+++ b/api/db/init_data/compilation_templates/session_graph.yaml
@@ -0,0 +1,67 @@
+kind: session_graph
+display_name: Session Graph — Knowledge graph from conversations
+config:
+  kind: knowledge_graph
+  entity:
+    description: >-
+      You are a robust entity and fact extractor for conversational data.
+    fields:
+      - type: person
+        description: A person mentioned or participating in the conversation.
+        rule: |
+          - Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
+          - Max length: 60 characters.
+      - type: org
+        description: Organization, company, team, or department.
+        rule: |
+          - Use the official name when possible (e.g., "Acme Corp").
+          - Max length: 80 characters.
+      - type: topic
+        description: Discussed topic, theme, or subject area.
+        rule: |
+          - Use the phrasing from the conversation (e.g., "API migration").
+          - Max length: 80 characters.
+      - type: fact
+        description: A verifiable factual statement extracted from the conversation.
+        rule: |
+          - Must be a single proposition (subject + predicate + object).
+          - Must be directly supported by the source text.
+          - Do not include opinions, speculation, or hypotheticals.
+        fact_fields:
+          - subject
+          - predicate
+          - object
+          - polarity
+          - confidence
+  relation:
+    description: >-
+      You are an expert in extracting semantic relations from conversations.
+    fields:
+      - type: mentions
+        description: One entity references or brings up another.
+        rule: |
+          - Direction from referrer to referred: (A mentions B).
+      - type: decides
+        description: A decision or resolution made during the conversation.
+        rule: |
+          - Direction from decision-maker to decision: (A decides B).
+      - type: assigns
+        description: Task or responsibility assigned to someone.
+        rule: |
+          - Direction from assigner to assignee: (A assigns B to C).
+      - type: references
+        description: Reference to a prior topic, document, or event.
+        rule: |
+          - Direction from current to referenced: (A references B).
+      - type: other
+        description: Any meaningful relation not covered by the above types.
+        rule: |
+          - Provide an explicit label in a "relation_label" field.
+  global_rules: ''
+  synthesis:
+    enabled: true
+    compile_kwd: "artifact_page"
+    example: |
+      - Build a wiki article covering the entity's role in the conversations.
+      - Include a summary paragraph and sections for key decisions and actions.
+      - Use [[entity_name]] for cross-references to related entities.
--- a/rag/svr/task_executor_refactor/chunk_post_processor.py
+++ b/rag/svr/task_executor_refactor/chunk_post_processor.py
@@ -913,6 +913,11 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
    ``handler._load_chunks_for_doc``) and fans each batch out to every
    configured non-artifact template, flushing accumulators through
    ``merge_compiled_structures`` at :data:`DOC_STRUCTURE_MERGE_MAX_DOCS`.
+
+    After extract+merge, if any template has ``synthesis.enabled``,
+    runs ``wiki_plan_from_reduction`` + ``wiki_refine_from_plan`` to
+    generate synthesis output (wiki pages, essence paragraphs, etc.).
+    Compile_kwd and REFINE prompt are read from the template config.
    """
    ctx = handler._task_context
    template_ids = _parser_config_compilation_template_ids(ctx.parser_config, ctx.tenant_id)
@@ -1079,7 +1084,7 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
                progress_cb(msg=f"  merge flush ({len(accumulators[template_id])} docs) for template ({idx + 1}/{total})")
                await _flush(template_id)

-    for idx, (template_id, _parser_cfg) in enumerate(active_templates):
+    for idx, (template_id, parser_cfg) in enumerate(active_templates):
        if ctx.has_canceled_func(ctx.id):
            raise TaskCanceledException(f"Task {ctx.id} was cancelled during document knowledge compilation")
        await _flush(template_id)
@@ -1087,6 +1092,83 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
        ctx.recording_context.record(f"document_structure_compile:{template_id}", agg)
        progress_cb(msg=f"Document knowledge compilation done ({idx + 1}/{total}): {agg}")

+        # ── Synthesis phase ──────────────────────────────────────────────
+        # If the template has synthesis.enabled, run wiki PLAN+REFINE
+        # to generate output (wiki page, essence paragraph, etc.).
+        synthesis_cfg = (parser_cfg or {}).get("synthesis") or {}
+        if synthesis_cfg.get("enabled"):
+            example = synthesis_cfg.get("example")
+            compile_kwd = synthesis_cfg.get("compile_kwd", "artifact_page")
+            plan_cfg = synthesis_cfg.get("plan") or {}
+
+            # Reserved for future wiki_plan_from_reduction extension:
+            # entity_type_filter, mention_count_threshold, top_n
+            if plan_cfg:
+                logging.debug(
+                    "synthesis: template %s plan config %r reserved for future use",
+                    template_id, plan_cfg,
+                )
+
+            if ctx.has_canceled_func(ctx.id):
+                raise TaskCanceledException(
+                    f"Task {ctx.id} was cancelled before synthesis PLAN"
+                )
+
+            if not example:
+                logging.warning(
+                    "synthesis: template %s has synthesis.enabled but no example; skipping",
+                    template_id,
+                )
+            else:
+                try:
+                    from rag.advanced_rag.knowlege_compile.wiki import (
+                        wiki_plan_from_reduction,
+                        wiki_refine_from_plan,
+                    )
+
+                    progress_cb(
+                        msg=f"Synthesis PLAN for template {template_id} (kind={compile_kwd}) ..."
+                    )
+                    plan = await wiki_plan_from_reduction(
+                        chat_mdl=chat_mdl_by_tid[template_id],
+                        embd_mdl=embedding_model,
+                        tenant_id=ctx.tenant_id,
+                        kb_id=ctx.kb_id,
+                        callback=progress_cb,
+                    )
+                    if ctx.has_canceled_func(ctx.id):
+                        raise TaskCanceledException(
+                            f"Task {ctx.id} was cancelled after synthesis PLAN"
+                        )
+
+                    if not plan or not plan.get("pages"):
+                        progress_cb(
+                            msg=f"Synthesis: no pages planned for template {template_id}."
+                        )
+                    else:
+                        progress_cb(
+                            msg=f"Synthesis REFINE for template {template_id} ({len(plan['pages'])} page(s)) ..."
+                        )
+                        pages = await wiki_refine_from_plan(
+                            chat_mdl=chat_mdl_by_tid[template_id],
+                            embd_mdl=embedding_model,
+                            tenant_id=ctx.tenant_id,
+                            kb_id=ctx.kb_id,
+                            callback=progress_cb,
+                            example=example,
+                        )
+                        # Overwrite compile_kwd on every output page so the
+                        # synthesis type is tracked correctly in ES.
+                        for p in pages or []:
+                            p["compile_kwd"] = compile_kwd
+                        progress_cb(
+                            msg=f"Synthesis done: {len(pages or [])} {compile_kwd} page(s) written."
+                        )
+                except Exception:
+                    logging.exception(
+                        "synthesis: failed for template %s", template_id,
+                    )
+

 async def run_document_post_chunking_if_last(
    handler,