Feat: add gbrain compile template for session/memory data (#16613)

This commit is contained in:
Yingfeng
2026-07-03 18:22:29 +08:00
committed by GitHub
parent ffc4d29a06
commit 706fa4e87a
3 changed files with 200 additions and 1 deletions

View File

@@ -0,0 +1,50 @@
kind: session_essence
display_name: Session Essence — Cross-source entity synopses from conversations
config:
kind: knowledge_graph
entity:
description: >-
You are a robust entity and fact extractor for conversational data.
fields:
- type: person
description: A person mentioned or participating in the conversation.
rule: |
- Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
- Max length: 60 characters.
- type: org
description: Organization, company, team, or department.
rule: |
- Use the official name when possible (e.g., "Acme Corp").
- Max length: 80 characters.
- type: topic
description: Discussed topic, theme, or subject area.
rule: |
- Use the phrasing from the conversation (e.g., "API migration").
- Max length: 80 characters.
- type: fact
description: A verifiable factual statement extracted from the conversation.
rule: |
- Must be a single proposition (subject + predicate + object).
- Must be directly supported by the source text.
- Do not include opinions, speculation, or hypotheticals.
fact_fields:
- subject
- predicate
- object
- polarity
- confidence
relation:
description: >-
You are an expert in extracting semantic relations from conversations.
fields:
- type: mentions
description: One entity references or brings up another.
rule: |
- Direction from referrer to referred: (A mentions B).
synthesis:
enabled: true
compile_kwd: "essence"
example: |
Write a 3-5 sentence executive summary about the entity.
Synthesize what multiple sources collectively say.
Output ONLY the paragraph. No headers, no JSON, no preamble.

View File

@@ -0,0 +1,67 @@
kind: session_graph
display_name: Session Graph — Knowledge graph from conversations
config:
kind: knowledge_graph
entity:
description: >-
You are a robust entity and fact extractor for conversational data.
fields:
- type: person
description: A person mentioned or participating in the conversation.
rule: |
- Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
- Max length: 60 characters.
- type: org
description: Organization, company, team, or department.
rule: |
- Use the official name when possible (e.g., "Acme Corp").
- Max length: 80 characters.
- type: topic
description: Discussed topic, theme, or subject area.
rule: |
- Use the phrasing from the conversation (e.g., "API migration").
- Max length: 80 characters.
- type: fact
description: A verifiable factual statement extracted from the conversation.
rule: |
- Must be a single proposition (subject + predicate + object).
- Must be directly supported by the source text.
- Do not include opinions, speculation, or hypotheticals.
fact_fields:
- subject
- predicate
- object
- polarity
- confidence
relation:
description: >-
You are an expert in extracting semantic relations from conversations.
fields:
- type: mentions
description: One entity references or brings up another.
rule: |
- Direction from referrer to referred: (A mentions B).
- type: decides
description: A decision or resolution made during the conversation.
rule: |
- Direction from decision-maker to decision: (A decides B).
- type: assigns
description: Task or responsibility assigned to someone.
rule: |
- Direction from assigner to assignee: (A assigns B to C).
- type: references
description: Reference to a prior topic, document, or event.
rule: |
- Direction from current to referenced: (A references B).
- type: other
description: Any meaningful relation not covered by the above types.
rule: |
- Provide an explicit label in a "relation_label" field.
global_rules: ''
synthesis:
enabled: true
compile_kwd: "artifact_page"
example: |
- Build a wiki article covering the entity's role in the conversations.
- Include a summary paragraph and sections for key decisions and actions.
- Use [[entity_name]] for cross-references to related entities.

View File

@@ -913,6 +913,11 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
``handler._load_chunks_for_doc``) and fans each batch out to every
configured non-artifact template, flushing accumulators through
``merge_compiled_structures`` at :data:`DOC_STRUCTURE_MERGE_MAX_DOCS`.
After extract+merge, if any template has ``synthesis.enabled``,
runs ``wiki_plan_from_reduction`` + ``wiki_refine_from_plan`` to
generate synthesis output (wiki pages, essence paragraphs, etc.).
Compile_kwd and REFINE prompt are read from the template config.
"""
ctx = handler._task_context
template_ids = _parser_config_compilation_template_ids(ctx.parser_config, ctx.tenant_id)
@@ -1079,7 +1084,7 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
progress_cb(msg=f" merge flush ({len(accumulators[template_id])} docs) for template ({idx + 1}/{total})")
await _flush(template_id)
for idx, (template_id, _parser_cfg) in enumerate(active_templates):
for idx, (template_id, parser_cfg) in enumerate(active_templates):
if ctx.has_canceled_func(ctx.id):
raise TaskCanceledException(f"Task {ctx.id} was cancelled during document knowledge compilation")
await _flush(template_id)
@@ -1087,6 +1092,83 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
ctx.recording_context.record(f"document_structure_compile:{template_id}", agg)
progress_cb(msg=f"Document knowledge compilation done ({idx + 1}/{total}): {agg}")
# ── Synthesis phase ──────────────────────────────────────────────
# If the template has synthesis.enabled, run wiki PLAN+REFINE
# to generate output (wiki page, essence paragraph, etc.).
synthesis_cfg = (parser_cfg or {}).get("synthesis") or {}
if synthesis_cfg.get("enabled"):
example = synthesis_cfg.get("example")
compile_kwd = synthesis_cfg.get("compile_kwd", "artifact_page")
plan_cfg = synthesis_cfg.get("plan") or {}
# Reserved for future wiki_plan_from_reduction extension:
# entity_type_filter, mention_count_threshold, top_n
if plan_cfg:
logging.debug(
"synthesis: template %s plan config %r reserved for future use",
template_id, plan_cfg,
)
if ctx.has_canceled_func(ctx.id):
raise TaskCanceledException(
f"Task {ctx.id} was cancelled before synthesis PLAN"
)
if not example:
logging.warning(
"synthesis: template %s has synthesis.enabled but no example; skipping",
template_id,
)
else:
try:
from rag.advanced_rag.knowlege_compile.wiki import (
wiki_plan_from_reduction,
wiki_refine_from_plan,
)
progress_cb(
msg=f"Synthesis PLAN for template {template_id} (kind={compile_kwd}) ..."
)
plan = await wiki_plan_from_reduction(
chat_mdl=chat_mdl_by_tid[template_id],
embd_mdl=embedding_model,
tenant_id=ctx.tenant_id,
kb_id=ctx.kb_id,
callback=progress_cb,
)
if ctx.has_canceled_func(ctx.id):
raise TaskCanceledException(
f"Task {ctx.id} was cancelled after synthesis PLAN"
)
if not plan or not plan.get("pages"):
progress_cb(
msg=f"Synthesis: no pages planned for template {template_id}."
)
else:
progress_cb(
msg=f"Synthesis REFINE for template {template_id} ({len(plan['pages'])} page(s)) ..."
)
pages = await wiki_refine_from_plan(
chat_mdl=chat_mdl_by_tid[template_id],
embd_mdl=embedding_model,
tenant_id=ctx.tenant_id,
kb_id=ctx.kb_id,
callback=progress_cb,
example=example,
)
# Overwrite compile_kwd on every output page so the
# synthesis type is tracked correctly in ES.
for p in pages or []:
p["compile_kwd"] = compile_kwd
progress_cb(
msg=f"Synthesis done: {len(pages or [])} {compile_kwd} page(s) written."
)
except Exception:
logging.exception(
"synthesis: failed for template %s", template_id,
)
async def run_document_post_chunking_if_last(
handler,