mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 01:29:35 +08:00
Feat: add gbrain compile template for session/memory data (#16613)
This commit is contained in:
50
api/db/init_data/compilation_templates/session_essence.yaml
Normal file
50
api/db/init_data/compilation_templates/session_essence.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
kind: session_essence
|
||||
display_name: Session Essence — Cross-source entity synopses from conversations
|
||||
config:
|
||||
kind: knowledge_graph
|
||||
entity:
|
||||
description: >-
|
||||
You are a robust entity and fact extractor for conversational data.
|
||||
fields:
|
||||
- type: person
|
||||
description: A person mentioned or participating in the conversation.
|
||||
rule: |
|
||||
- Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
|
||||
- Max length: 60 characters.
|
||||
- type: org
|
||||
description: Organization, company, team, or department.
|
||||
rule: |
|
||||
- Use the official name when possible (e.g., "Acme Corp").
|
||||
- Max length: 80 characters.
|
||||
- type: topic
|
||||
description: Discussed topic, theme, or subject area.
|
||||
rule: |
|
||||
- Use the phrasing from the conversation (e.g., "API migration").
|
||||
- Max length: 80 characters.
|
||||
- type: fact
|
||||
description: A verifiable factual statement extracted from the conversation.
|
||||
rule: |
|
||||
- Must be a single proposition (subject + predicate + object).
|
||||
- Must be directly supported by the source text.
|
||||
- Do not include opinions, speculation, or hypotheticals.
|
||||
fact_fields:
|
||||
- subject
|
||||
- predicate
|
||||
- object
|
||||
- polarity
|
||||
- confidence
|
||||
relation:
|
||||
description: >-
|
||||
You are an expert in extracting semantic relations from conversations.
|
||||
fields:
|
||||
- type: mentions
|
||||
description: One entity references or brings up another.
|
||||
rule: |
|
||||
- Direction from referrer to referred: (A mentions B).
|
||||
synthesis:
|
||||
enabled: true
|
||||
compile_kwd: "essence"
|
||||
example: |
|
||||
Write a 3-5 sentence executive summary about the entity.
|
||||
Synthesize what multiple sources collectively say.
|
||||
Output ONLY the paragraph. No headers, no JSON, no preamble.
|
||||
67
api/db/init_data/compilation_templates/session_graph.yaml
Normal file
67
api/db/init_data/compilation_templates/session_graph.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
kind: session_graph
|
||||
display_name: Session Graph — Knowledge graph from conversations
|
||||
config:
|
||||
kind: knowledge_graph
|
||||
entity:
|
||||
description: >-
|
||||
You are a robust entity and fact extractor for conversational data.
|
||||
fields:
|
||||
- type: person
|
||||
description: A person mentioned or participating in the conversation.
|
||||
rule: |
|
||||
- Full name preferred (e.g., "Alice Zhang", not "A. Zhang").
|
||||
- Max length: 60 characters.
|
||||
- type: org
|
||||
description: Organization, company, team, or department.
|
||||
rule: |
|
||||
- Use the official name when possible (e.g., "Acme Corp").
|
||||
- Max length: 80 characters.
|
||||
- type: topic
|
||||
description: Discussed topic, theme, or subject area.
|
||||
rule: |
|
||||
- Use the phrasing from the conversation (e.g., "API migration").
|
||||
- Max length: 80 characters.
|
||||
- type: fact
|
||||
description: A verifiable factual statement extracted from the conversation.
|
||||
rule: |
|
||||
- Must be a single proposition (subject + predicate + object).
|
||||
- Must be directly supported by the source text.
|
||||
- Do not include opinions, speculation, or hypotheticals.
|
||||
fact_fields:
|
||||
- subject
|
||||
- predicate
|
||||
- object
|
||||
- polarity
|
||||
- confidence
|
||||
relation:
|
||||
description: >-
|
||||
You are an expert in extracting semantic relations from conversations.
|
||||
fields:
|
||||
- type: mentions
|
||||
description: One entity references or brings up another.
|
||||
rule: |
|
||||
- Direction from referrer to referred: (A mentions B).
|
||||
- type: decides
|
||||
description: A decision or resolution made during the conversation.
|
||||
rule: |
|
||||
- Direction from decision-maker to decision: (A decides B).
|
||||
- type: assigns
|
||||
description: Task or responsibility assigned to someone.
|
||||
rule: |
|
||||
- Direction from assigner to assignee: (A assigns B to C).
|
||||
- type: references
|
||||
description: Reference to a prior topic, document, or event.
|
||||
rule: |
|
||||
- Direction from current to referenced: (A references B).
|
||||
- type: other
|
||||
description: Any meaningful relation not covered by the above types.
|
||||
rule: |
|
||||
- Provide an explicit label in a "relation_label" field.
|
||||
global_rules: ''
|
||||
synthesis:
|
||||
enabled: true
|
||||
compile_kwd: "artifact_page"
|
||||
example: |
|
||||
- Build a wiki article covering the entity's role in the conversations.
|
||||
- Include a summary paragraph and sections for key decisions and actions.
|
||||
- Use [[entity_name]] for cross-references to related entities.
|
||||
@@ -913,6 +913,11 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
|
||||
``handler._load_chunks_for_doc``) and fans each batch out to every
|
||||
configured non-artifact template, flushing accumulators through
|
||||
``merge_compiled_structures`` at :data:`DOC_STRUCTURE_MERGE_MAX_DOCS`.
|
||||
|
||||
After extract+merge, if any template has ``synthesis.enabled``,
|
||||
runs ``wiki_plan_from_reduction`` + ``wiki_refine_from_plan`` to
|
||||
generate synthesis output (wiki pages, essence paragraphs, etc.).
|
||||
Compile_kwd and REFINE prompt are read from the template config.
|
||||
"""
|
||||
ctx = handler._task_context
|
||||
template_ids = _parser_config_compilation_template_ids(ctx.parser_config, ctx.tenant_id)
|
||||
@@ -1079,7 +1084,7 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
|
||||
progress_cb(msg=f" merge flush ({len(accumulators[template_id])} docs) for template ({idx + 1}/{total})")
|
||||
await _flush(template_id)
|
||||
|
||||
for idx, (template_id, _parser_cfg) in enumerate(active_templates):
|
||||
for idx, (template_id, parser_cfg) in enumerate(active_templates):
|
||||
if ctx.has_canceled_func(ctx.id):
|
||||
raise TaskCanceledException(f"Task {ctx.id} was cancelled during document knowledge compilation")
|
||||
await _flush(template_id)
|
||||
@@ -1087,6 +1092,83 @@ async def run_document_structure_compile(handler, embedding_model: LLMBundle) ->
|
||||
ctx.recording_context.record(f"document_structure_compile:{template_id}", agg)
|
||||
progress_cb(msg=f"Document knowledge compilation done ({idx + 1}/{total}): {agg}")
|
||||
|
||||
# ── Synthesis phase ──────────────────────────────────────────────
|
||||
# If the template has synthesis.enabled, run wiki PLAN+REFINE
|
||||
# to generate output (wiki page, essence paragraph, etc.).
|
||||
synthesis_cfg = (parser_cfg or {}).get("synthesis") or {}
|
||||
if synthesis_cfg.get("enabled"):
|
||||
example = synthesis_cfg.get("example")
|
||||
compile_kwd = synthesis_cfg.get("compile_kwd", "artifact_page")
|
||||
plan_cfg = synthesis_cfg.get("plan") or {}
|
||||
|
||||
# Reserved for future wiki_plan_from_reduction extension:
|
||||
# entity_type_filter, mention_count_threshold, top_n
|
||||
if plan_cfg:
|
||||
logging.debug(
|
||||
"synthesis: template %s plan config %r reserved for future use",
|
||||
template_id, plan_cfg,
|
||||
)
|
||||
|
||||
if ctx.has_canceled_func(ctx.id):
|
||||
raise TaskCanceledException(
|
||||
f"Task {ctx.id} was cancelled before synthesis PLAN"
|
||||
)
|
||||
|
||||
if not example:
|
||||
logging.warning(
|
||||
"synthesis: template %s has synthesis.enabled but no example; skipping",
|
||||
template_id,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
from rag.advanced_rag.knowlege_compile.wiki import (
|
||||
wiki_plan_from_reduction,
|
||||
wiki_refine_from_plan,
|
||||
)
|
||||
|
||||
progress_cb(
|
||||
msg=f"Synthesis PLAN for template {template_id} (kind={compile_kwd}) ..."
|
||||
)
|
||||
plan = await wiki_plan_from_reduction(
|
||||
chat_mdl=chat_mdl_by_tid[template_id],
|
||||
embd_mdl=embedding_model,
|
||||
tenant_id=ctx.tenant_id,
|
||||
kb_id=ctx.kb_id,
|
||||
callback=progress_cb,
|
||||
)
|
||||
if ctx.has_canceled_func(ctx.id):
|
||||
raise TaskCanceledException(
|
||||
f"Task {ctx.id} was cancelled after synthesis PLAN"
|
||||
)
|
||||
|
||||
if not plan or not plan.get("pages"):
|
||||
progress_cb(
|
||||
msg=f"Synthesis: no pages planned for template {template_id}."
|
||||
)
|
||||
else:
|
||||
progress_cb(
|
||||
msg=f"Synthesis REFINE for template {template_id} ({len(plan['pages'])} page(s)) ..."
|
||||
)
|
||||
pages = await wiki_refine_from_plan(
|
||||
chat_mdl=chat_mdl_by_tid[template_id],
|
||||
embd_mdl=embedding_model,
|
||||
tenant_id=ctx.tenant_id,
|
||||
kb_id=ctx.kb_id,
|
||||
callback=progress_cb,
|
||||
example=example,
|
||||
)
|
||||
# Overwrite compile_kwd on every output page so the
|
||||
# synthesis type is tracked correctly in ES.
|
||||
for p in pages or []:
|
||||
p["compile_kwd"] = compile_kwd
|
||||
progress_cb(
|
||||
msg=f"Synthesis done: {len(pages or [])} {compile_kwd} page(s) written."
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"synthesis: failed for template %s", template_id,
|
||||
)
|
||||
|
||||
|
||||
async def run_document_post_chunking_if_last(
|
||||
handler,
|
||||
|
||||
Reference in New Issue
Block a user