mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-05 10:58:34 +08:00
### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Refactoring --------- Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
84 lines
2.1 KiB
JSON
84 lines
2.1 KiB
JSON
{
|
|
"components": {
|
|
"File": {
|
|
"obj":{
|
|
"component_name": "File",
|
|
"params": {
|
|
}
|
|
},
|
|
"downstream": ["Parser:0"],
|
|
"upstream": []
|
|
},
|
|
"Parser:0": {
|
|
"obj": {
|
|
"component_name": "Parser",
|
|
"params": {
|
|
"setups": {
|
|
"pdf": {
|
|
"parse_method": "deepdoc",
|
|
"vlm_name": "",
|
|
"lang": "Chinese",
|
|
"suffix": [
|
|
"pdf"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"spreadsheet": {
|
|
"suffix": [
|
|
"xls",
|
|
"xlsx",
|
|
"csv"
|
|
],
|
|
"output_format": "html"
|
|
},
|
|
"word": {
|
|
"suffix": [
|
|
"doc",
|
|
"docx"
|
|
],
|
|
"output_format": "json"
|
|
},
|
|
"markdown": {
|
|
"suffix": [
|
|
"md",
|
|
"markdown"
|
|
],
|
|
"output_format": "text"
|
|
},
|
|
"text": {
|
|
"suffix": ["txt"],
|
|
"output_format": "json"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"downstream": ["TokenChunker:0"],
|
|
"upstream": ["File"]
|
|
},
|
|
"TokenChunker:0": {
|
|
"obj": {
|
|
"component_name": "TokenChunker",
|
|
"params": {
|
|
"chunk_token_size": 512,
|
|
"delimiters": ["\r\n"],
|
|
"overlapped_percent": 0
|
|
}
|
|
},
|
|
"downstream": ["TitleChunker:0"],
|
|
"upstream": ["Parser:0"]
|
|
},
|
|
"TitleChunker:0": {
|
|
"obj": {
|
|
"component_name": "TitleChunker",
|
|
"params": {
|
|
"levels": [["^#[^#]"], ["^##[^#]"], ["^###[^#]"], ["^####[^#]"]],
|
|
"hierarchy": 2
|
|
}
|
|
},
|
|
"downstream": [],
|
|
"upstream": ["TokenChunker:0"]
|
|
}
|
|
},
|
|
"path": []
|
|
}
|