diff --git a/agent/templates/ingestion_pipeline_Book.json b/agent/templates/ingestion_pipeline_book.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_Book.json
rename to agent/templates/ingestion_pipeline_book.json
diff --git a/agent/templates/ingestion_pipeline_General.json b/agent/templates/ingestion_pipeline_general.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_General.json
rename to agent/templates/ingestion_pipeline_general.json
diff --git a/agent/templates/ingestion_pipeline_Laws.json b/agent/templates/ingestion_pipeline_laws.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_Laws.json
rename to agent/templates/ingestion_pipeline_laws.json
diff --git a/agent/templates/ingestion_pipeline_Manual.json b/agent/templates/ingestion_pipeline_manual.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_Manual.json
rename to agent/templates/ingestion_pipeline_manual.json
diff --git a/agent/templates/ingestion_pipeline_One.json b/agent/templates/ingestion_pipeline_one.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_One.json
rename to agent/templates/ingestion_pipeline_one.json
diff --git a/agent/templates/ingestion_pipeline_Paper.json b/agent/templates/ingestion_pipeline_paper.json
similarity index 100%
rename from agent/templates/ingestion_pipeline_Paper.json
rename to agent/templates/ingestion_pipeline_paper.json
diff --git a/agent/templates/ingestion_pipeline_Resume.json b/agent/templates/ingestion_pipeline_resume.json
similarity index 98%
rename from agent/templates/ingestion_pipeline_Resume.json
rename to agent/templates/ingestion_pipeline_resume.json
index 7b8d989957..cb35eb2043 100644
--- a/agent/templates/ingestion_pipeline_Resume.json
+++ b/agent/templates/ingestion_pipeline_resume.json
@@ -242,13 +242,14 @@
"include_heading_content": false,
"levels": [
[
- "^\\s*(?i:(?:\\d+[\\.\\)]\\s*)?(?:EDUCATION|ACADEMIC\\s*BACKGROUND|ACADEMIC\\s*HISTORY|EDUCATIONAL\\s*BACKGROUND|RELEVANT\\s*COURSEWORK|COURSEWORK|EXPERIENCE|WORK\\s*EXPERIENCE|PROFESSIONAL\\s*EXPERIENCE|RELEVANT\\s*EXPERIENCE|EMPLOYMENT\\s*HISTORY|CAREER\\s*HISTORY|INTERNSHIP\\s*EXPERIENCE|PROJECTS|PROJECT\\s*EXPERIENCE|ACADEMIC\\s*PROJECTS|PROFESSIONAL\\s*PROJECTS|SKILLS|TECHNICAL\\s*SKILLS|CORE\\s*COMPETENCIES|COMPETENCIES|QUALIFICATIONS|SUMMARY\\s*OF\\s*QUALIFICATIONS|CERTIFICATIONS|LICENSES|CERTIFICATES|AWARDS|HONORS|HONOURS|ACHIEVEMENTS|PUBLICATIONS|RESEARCH|RESEARCH\\s*EXPERIENCE|LEADERSHIP|LEADERSHIP\\s*EXPERIENCE|ACTIVITIES|EXTRACURRICULAR\\s*ACTIVITIES|ACTIVITIES\\s*(?:&|AND)\\s*SKILLS|INVOLVEMENT|CAMPUS\\s*INVOLVEMENT|VOLUNTEER\\s*EXPERIENCE|VOLUNTEERING|COMMUNITY\\s*SERVICE|LANGUAGES|INTERESTS|HOBBIES|PROFILE|PROFESSIONAL\\s*PROFILE|SUMMARY|PROFESSIONAL\\s*SUMMARY|CAREER\\s*SUMMARY|OBJECTIVE|CAREER\\s*OBJECTIVE|PERSONAL\\s*INFORMATION|CONTACT\\s*INFORMATION|ADDITIONAL\\s*INFORMATION|TRAINING))\\s*[:\uff1a]?\\s*$"
+ "^\\s*(?i:(?:\\d+[\\.\\)]\\s*)?(?:EDUCATION|ACADEMIC\\s*BACKGROUND|ACADEMIC\\s*HISTORY|EDUCATIONAL\\s*BACKGROUND|RELEVANT\\s*COURSEWORK|COURSEWORK|EXPERIENCE|WORK\\s*EXPERIENCE|PROFESSIONAL\\s*EXPERIENCE|RELEVANT\\s*EXPERIENCE|EMPLOYMENT\\s*HISTORY|CAREER\\s*HISTORY|INTERNSHIP\\s*EXPERIENCE|PROJECTS|PROJECT\\s*EXPERIENCE|ACADEMIC\\s*PROJECTS|PROFESSIONAL\\s*PROJECTS|SKILLS|TECHNICAL\\s*SKILLS|CORE\\s*COMPETENCIES|COMPETENCIES|QUALIFICATIONS|SUMMARY\\s*OF\\s*QUALIFICATIONS|CERTIFICATIONS|LICENSES|CERTIFICATES|AWARDS|HONORS|HONOURS|ACHIEVEMENTS|PUBLICATIONS|RESEARCH|RESEARCH\\s*EXPERIENCE|LEADERSHIP|LEADERSHIP\\s*EXPERIENCE|ACTIVITIES|EXTRACURRICULAR\\s*ACTIVITIES|ACTIVITIES\\s*(?:&|AND)\\s*SKILLS|INVOLVEMENT|CAMPUS\\s*INVOLVEMENT|VOLUNTEER\\s*EXPERIENCE|VOLUNTEERING|COMMUNITY\\s*SERVICE|LANGUAGES|INTERESTS|HOBBIES|PROFILE|PROFESSIONAL\\s*PROFILE|SUMMARY|PROFESSIONAL\\s*SUMMARY|CAREER\\s*SUMMARY|OBJECTIVE|CAREER\\s*OBJECTIVE|PERSONAL\\s*INFORMATION|CONTACT\\s*INFORMATION|ADDITIONAL\\s*INFORMATION|TRAINING))\\s*[:\uff1a]?\\s*$"
],
[
"^\\s*(?:\\d+[\\.\u3001\\)]\\s*)?(?:\u6559\u80b2\u80cc\u666f|\u6559\u80b2\u7ecf\u5386|\u5b66\u5386\u80cc\u666f|\u5b66\u672f\u80cc\u666f|\u6280\u672f\u80cc\u666f|\u5de5\u4f5c\u7ecf\u5386|\u5de5\u4f5c\u7ecf\u9a8c|\u5b9e\u4e60\u7ecf\u5386|\u9879\u76ee\u7ecf\u5386|\u9879\u76ee\u7ecf\u9a8c|\u79d1\u7814\u7ecf\u5386|\u7814\u7a76\u7ecf\u5386|\u6821\u56ed\u7ecf\u5386|\u5b9e\u8df5\u7ecf\u5386|\u4e13\u4e1a\u7ecf\u5386|\u804c\u4e1a\u7ecf\u5386|\u6280\u80fd|\u4e13\u4e1a\u6280\u80fd|\u6280\u80fd\u7279\u957f|\u6838\u5fc3\u6280\u80fd|\u6280\u672f\u6808|\u4e2a\u4eba\u6280\u80fd|\u5de5\u4f5c\u6280\u80fd|\u804c\u4e1a\u6280\u80fd|\u6280\u80fd\u4e0e\u8bc4\u4ef7|\u6280\u80fd\u4e0e\u81ea\u6211\u8bc4\u4ef7|\u5de5\u4f5c\u6280\u80fd\u4e0e\u81ea\u6211\u8bc4\u4ef7|\u804c\u4e1a\u6280\u80fd\u4e0e\u81ea\u6211\u8bc4\u4ef7|\u8bc1\u4e66|\u8d44\u683c\u8bc1\u4e66|\u804c\u4e1a\u8d44\u683c|\u8d44\u8d28\u8bc1\u4e66|\u83b7\u5956\u60c5\u51b5|\u83b7\u5956\u7ecf\u5386|\u8363\u8a89|\u8363\u8a89\u5956\u9879|\u5956\u9879|\u79d1\u7814\u6210\u679c|\u8bba\u6587\u53d1\u8868|\u53d1\u8868\u8bba\u6587|\u9886\u5bfc\u7ecf\u5386|\u5b66\u751f\u5de5\u4f5c|\u6821\u56ed\u6d3b\u52a8|\u793e\u56e2\u7ecf\u5386|\u6d3b\u52a8\u7ecf\u5386|\u5fd7\u613f\u7ecf\u5386|\u5fd7\u613f\u670d\u52a1|\u793e\u4f1a\u5b9e\u8df5|\u8bed\u8a00\u80fd\u529b|\u8bed\u8a00|\u81ea\u6211\u8bc4\u4ef7|\u4e2a\u4eba\u8bc4\u4ef7|\u81ea\u6211\u603b\u7ed3|\u4e2a\u4eba\u603b\u7ed3|\u4e2a\u4eba\u4f18\u52bf|\u4e2a\u4eba\u7b80\u4ecb|\u4e2a\u4eba\u4fe1\u606f|\u57fa\u672c\u4fe1\u606f|\u8054\u7cfb\u65b9\u5f0f|\u6c42\u804c\u610f\u5411|\u5e94\u8058\u610f\u5411|\u804c\u4e1a\u76ee\u6807|\u6c42\u804c\u76ee\u6807|\u5174\u8da3\u7231\u597d|\u5174\u8da3\u7279\u957f|\u57f9\u8bad\u7ecf\u5386|\u5176\u4ed6\u4fe1\u606f|\u9644\u52a0\u4fe1\u606f)\\s*[:\uff1a]?\\s*$"
]
],
- "method": "hierarchy"
+ "method": "hierarchy",
+ "root_chunk_as_heading": true
}
},
"upstream": [
@@ -299,16 +300,6 @@
"target": "TitleChunker:FlatMiceFix",
"targetHandle": "end"
},
- {
- "data": {
- "isHovered": false
- },
- "id": "xy-edge__TitleChunker:FlatMiceFixstart-Extractor:ThreeDrinksActend",
- "source": "TitleChunker:FlatMiceFix",
- "sourceHandle": "start",
- "target": "Extractor:ThreeDrinksAct",
- "targetHandle": "end"
- },
{
"data": {
"isHovered": false
@@ -321,6 +312,19 @@
"targetHandle": "end",
"type": "buttonEdge",
"zIndex": 1001
+ },
+ {
+ "data": {
+ "isHovered": false
+ },
+ "id": "xy-edge__TitleChunker:FlatMiceFixstart-Extractor:ThreeDrinksActend",
+ "markerEnd": "logo",
+ "source": "TitleChunker:FlatMiceFix",
+ "sourceHandle": "start",
+ "target": "Extractor:ThreeDrinksAct",
+ "targetHandle": "end",
+ "type": "buttonEdge",
+ "zIndex": 1001
}
],
"nodes": [
@@ -331,7 +335,7 @@
},
"id": "File",
"measured": {
- "height": 50,
+ "height": 49,
"width": 200
},
"position": {
@@ -460,7 +464,7 @@
"dragging": false,
"id": "Parser:HipSignsRhyme",
"measured": {
- "height": 198,
+ "height": 197,
"width": 200
},
"position": {
@@ -489,12 +493,12 @@
"dragging": false,
"id": "Tokenizer:KindHandsWin",
"measured": {
- "height": 114,
+ "height": 113,
"width": 200
},
"position": {
- "x": 876.4654525205967,
- "y": 189.1906747329592
+ "x": 883.0243372012395,
+ "y": 156.39625132974524
},
"selected": false,
"sourcePosition": "right",
@@ -514,6 +518,7 @@
}
},
"promote_first_heading_to_root": false,
+ "root_chunk_as_heading": true,
"rules": [
{
"levels": [
@@ -537,14 +542,14 @@
"dragging": false,
"id": "TitleChunker:FlatMiceFix",
"measured": {
- "height": 74,
+ "height": 73,
"width": 200
},
"position": {
"x": 572.7908769627791,
"y": 141.55515313482098
},
- "selected": false,
+ "selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "chunkerNode"
@@ -580,12 +585,12 @@
"dragging": false,
"id": "Extractor:ThreeDrinksAct",
"measured": {
- "height": 90,
+ "height": 89,
"width": 200
},
"position": {
- "x": 583.3659219536569,
- "y": 274.7600100230409
+ "x": 623.8123774842874,
+ "y": 236.49984938595793
},
"selected": false,
"sourcePosition": "right",
diff --git a/rag/flow/chunker/title_chunker/common.py b/rag/flow/chunker/title_chunker/common.py
index 95a19fc3ed..89981a83de 100644
--- a/rag/flow/chunker/title_chunker/common.py
+++ b/rag/flow/chunker/title_chunker/common.py
@@ -41,6 +41,7 @@ class TitleChunkerParam(ProcessParamBase):
self.levels = []
self.hierarchy = None
self.include_heading_content = False
+ self.root_chunk_as_heading = False
def check(self):
if self.method in {"hierarchy", "group"}:
@@ -240,13 +241,13 @@ class BaseTitleChunker(ABC):
# chunk box is defined by merged source positions and the text payload
# is normalized by removing parser tags.
if self.from_upstream.output_format in ["markdown", "text", "html"]:
- return [
+ chunks = [
{"text": "".join(record["text"] + "\n" for record in records)}
for records in record_groups
if records
]
- return [
+ chunks = [
(
{
"text": RAGFlowPdfParser.remove_tag("".join(record["text"] + "\n" for record in records)),
@@ -264,6 +265,17 @@ class BaseTitleChunker(ABC):
for records in record_groups
if records
]
+
+ if self.param.root_chunk_as_heading and len(chunks) > 1:
+ root_chunk = chunks[0]
+ root_text = root_chunk.get("text", "")
+
+ for ck in chunks[1:]:
+ ck['text'] = root_text + "\n" + ck.get("text", "")
+
+ return chunks[1:]
+
+ return chunks
async def set_chunks(self, chunks):
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index a2dea44bcd..5c0ff38c61 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1513,6 +1513,9 @@ Example: Virtual Hosted Style`,
includeHeadingContent: 'Include heading content',
includeHeadingContentTip:
'When enabled, content directly under a heading is kept as its own chunk. Child chunks keep only the heading path.',
+ rootAsHeading: 'Use root as H0 heading',
+ rootAsHeadingTip:
+ 'Treat the root node as a H0 heading when building the hierarchy',
hierarchyTip: `Build a heading tree and produce self-contained chunks, each carrying its full ancestor heading path (e.g. Part 1 › Chapter 3 › Section 2 + body text).\n
Best for: Documents with independent, structurally significant sections — such as legal statutes, regulations, contracts, and technical specifications — where each chunk must be identifiable by its structural position even without surrounding context.`,
groupTip: `Split the document flat at a chosen heading level and automatically merge adjacent small sections to preserve content continuity. No parent-heading path is injected.\n
diff --git a/web/src/pages/agent/constant/pipeline.tsx b/web/src/pages/agent/constant/pipeline.tsx
index 307dab82dc..8271838f18 100644
--- a/web/src/pages/agent/constant/pipeline.tsx
+++ b/web/src/pages/agent/constant/pipeline.tsx
@@ -333,6 +333,7 @@ export const initialTitleChunkerValues = {
method: 'hierarchy',
hierarchy: Hierarchy.H3,
include_heading_content: false,
+ root_chunk_as_heading: false,
rules: rules,
};
@@ -340,6 +341,7 @@ export const initialGroupValues = {
method: 'group',
hierarchy: '0',
include_heading_content: false,
+ root_chunk_as_heading: false,
rules: rules,
};
diff --git a/web/src/pages/agent/form/title-chunker-form/hook.ts b/web/src/pages/agent/form/title-chunker-form/hook.ts
index fca7ce9093..481d425f46 100644
--- a/web/src/pages/agent/form/title-chunker-form/hook.ts
+++ b/web/src/pages/agent/form/title-chunker-form/hook.ts
@@ -128,6 +128,7 @@ function transformApiResponseToForm(
method,
hierarchy,
include_heading_content: Boolean(apiData.include_heading_content),
+ root_chunk_as_heading: Boolean(apiData.root_chunk_as_heading),
rules,
};
}
diff --git a/web/src/pages/agent/form/title-chunker-form/index.tsx b/web/src/pages/agent/form/title-chunker-form/index.tsx
index b800c4f023..0f6723577d 100644
--- a/web/src/pages/agent/form/title-chunker-form/index.tsx
+++ b/web/src/pages/agent/form/title-chunker-form/index.tsx
@@ -29,6 +29,7 @@ import { transformApiResponseToForm, useDynamicHierarchyOptions } from './hook';
type FormModeValues = {
hierarchy?: string;
include_heading_content?: boolean;
+ root_chunk_as_heading?: boolean;
rules: Array<{ levels: Array<{ expression: string }> }>;
};
@@ -60,6 +61,7 @@ export const FormSchema = z.object({
method: z.enum(['hierarchy', 'group']),
hierarchy: z.string().optional(),
include_heading_content: z.boolean().optional(),
+ root_chunk_as_heading: z.boolean().optional(),
rules: rulesSchema,
});
@@ -221,12 +223,14 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
hierarchyModeValues.current = {
hierarchy: hierarchyValue,
include_heading_content: form.getValues('include_heading_content'),
+ root_chunk_as_heading: form.getValues('root_chunk_as_heading'),
rules: rulesValue,
};
} else if (currentMode === 'group') {
groupValues.current = {
hierarchy: hierarchyValue,
include_heading_content: form.getValues('include_heading_content'),
+ root_chunk_as_heading: form.getValues('root_chunk_as_heading'),
rules: rulesValue,
};
}
@@ -239,6 +243,7 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
method: 'group',
hierarchy: modeValues?.hierarchy ?? '0',
include_heading_content: false,
+ root_chunk_as_heading: false,
rules: modeValues?.rules || initialGroupValues.rules,
});
} else {
@@ -251,12 +256,14 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
hierarchy: modeValues.hierarchy || defaultHierarchy,
include_heading_content:
modeValues.include_heading_content || false,
+ root_chunk_as_heading: modeValues.root_chunk_as_heading || false,
rules: modeValues.rules,
});
} else {
const newModeValues: FormModeValues = {
hierarchy: defaultHierarchy,
include_heading_content: false,
+ root_chunk_as_heading: false,
rules: JSON.parse(JSON.stringify(initialTitleChunkerValues.rules)),
};
@@ -264,6 +271,7 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
method: method,
hierarchy: defaultHierarchy,
include_heading_content: newModeValues.include_heading_content,
+ root_chunk_as_heading: newModeValues.root_chunk_as_heading,
rules: newModeValues.rules,
});
}
@@ -323,23 +331,46 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
{method === 'hierarchy' && (
-
- {(field) => (
- {
- field.onChange?.(checked);
- }}
- />
- )}
-
+ <>
+
+ {(field) => (
+ {
+ field.onChange?.(checked);
+ }}
+ />
+ )}
+
+
+
+ {(field) => (
+ {
+ field.onChange?.(checked);
+ }}
+ />
+ )}
+
+ >
)}
{/* {method === 'group' ? (
diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts
index 5b21780741..d77948d93a 100644
--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@@ -338,6 +338,7 @@ function transformTitleChunkerParams(params: TitleChunkerFormSchemaType) {
method: params.method,
hierarchy: Number(params.hierarchy || 0),
include_heading_content: Boolean(params.include_heading_content),
+ root_chunk_as_heading: Boolean(params.root_chunk_as_heading),
levels,
};
}