diff --git a/agent/templates/advanced_ingestion_pipeline.json b/agent/templates/advanced_ingestion_pipeline.json index 08066f0b1c..02b58c2e22 100644 --- a/agent/templates/advanced_ingestion_pipeline.json +++ b/agent/templates/advanced_ingestion_pipeline.json @@ -12,316 +12,142 @@ }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", - "dsl": { - "components": { - "File": { - "obj": { - "component_name": "File", - "params": {} - }, - "downstream": [ - "Parser:HipSignsRhyme" - ], - "upstream": [] - }, - "Parser:HipSignsRhyme": { - "obj": { - "component_name": "Parser", - "params": { - "outputs": { - "html": { - "type": "string", - "value": "" + "dsl": { + "components": { + "Extractor:CleverPianosInvite": { + "downstream": [ + "Tokenizer:ShyBalloonsSmell" + ], + "obj": { + "component_name": "Extractor", + "params": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } }, - "json": { - "type": "Array", - "value": [] - }, - "markdown": { - "type": "string", - "value": "" - }, - "text": { - "type": "string", - "value": "" - } - }, - "setups": { - "pdf": { - "output_format": "markdown", - "suffix": [ - "pdf" - ], - "parse_method": "DeepDOC" - }, - "spreadsheet": { - "output_format": "html", - "suffix": [ - "xls", - "xlsx", - "csv" - ] - }, - "image": { - "output_format": "text", - "suffix": [ - "jpg", - "jpeg", - "png", - "gif" - ], - "parse_method": "ocr" - }, - "email": { - "output_format": "text", - "suffix": [ - "eml", - "msg" - ], - "fields": [ - "from", - "to", - "cc", - "bcc", - "date", - "subject", - "body", - "attachments" - ] - }, - "text&markdown": { - "output_format": "text", - "suffix": [ - "md", - "markdown", - "mdx", - "txt" - ] - }, - "word": { - "output_format": "json", - "suffix": [ - "doc", - "docx" - ] - }, - "slides": { - "output_format": "json", - "suffix": [ - "pptx" - ] - } + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": [ + { + "content": "Text to Summarize:\n{Extractor:SunnyCooksSpend@chunks}", + "role": "user" + } + ], + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 } - } - }, - "downstream": [ - "TokenChunker:KindDingosJam" - ], - "upstream": [ - "File" - ] - }, - "TokenChunker:KindDingosJam": { - "obj": { - "component_name": "TokenChunker", - "params": { - "chunk_token_size": 512, - "delimiters": [ - "\n" - ], - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - }, - "overlapped_percent": 0.002 - } - }, - "downstream": [ - "Extractor:NineTiesSin" - ], - "upstream": [ - "Parser:HipSignsRhyme" - ] - }, - "Extractor:NineTiesSin": { - "obj": { - "component_name": "Extractor", - "params": { - "field_name": "summary", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": [ - { - "content": "Text to Summarize:\n{TokenChunker:KindDingosJam@chunks}", - "role": "user" - } - ], - "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - } - }, - "downstream": [ - "Extractor:TastyPointsLay" - ], - "upstream": [ - "TokenChunker:KindDingosJam" - ] - }, - "Extractor:TastyPointsLay": { - "obj": { - "component_name": "Extractor", - "params": { - "field_name": "keywords", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": [ - { - "content": "Text Content:\n{Extractor:NineTiesSin@chunks}\n", - "role": "user" - } - ], - "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nExtract the most important keywords/phrases of a given piece of text content.\n\nRequirements\n- Summarize the text content, and give the top 5 important keywords/phrases.\n- The keywords MUST be in the same language as the given piece of text content.\n- The keywords are delimited by ENGLISH COMMA.\n- Output keywords ONLY.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - } - }, - "downstream": [ - "Extractor:BlueResultsWink" - ], - "upstream": [ - "Extractor:NineTiesSin" - ] - }, - "Extractor:BlueResultsWink": { - "obj": { - "component_name": "Extractor", - "params": { - "field_name": "questions", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": [ - { - "content": "Text Content:\n\n{Extractor:TastyPointsLay@chunks}\n", - "role": "user" - } - ], - "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nPropose 3 questions about a given piece of text content.\n\nRequirements\n- Understand and summarize the text content, and propose the top 3 important questions.\n- The questions SHOULD NOT have overlapping meanings.\n- The questions SHOULD cover the main content of the text as much as possible.\n- The questions MUST be in the same language as the given piece of text content.\n- One question per line.\n- Output questions ONLY.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - } - }, - "downstream": [ - "Extractor:CuteBusesBet" - ], - "upstream": [ - "Extractor:TastyPointsLay" - ] - }, - "Extractor:CuteBusesBet": { - "obj": { - "component_name": "Extractor", - "params": { - "field_name": "metadata", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": [ - { - "content": "Content: \n\n{Extractor:BlueResultsWink@chunks}", - "role": "user" - } - ], - "sys_prompt": "Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.\n\nImportant structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - } - }, - "downstream": [ - "Tokenizer:LegalHorsesCheer" - ], - "upstream": [ - "Extractor:BlueResultsWink" - ] - }, - "Tokenizer:LegalHorsesCheer": { - "obj": { - "component_name": "Tokenizer", - "params": { - "fields": "text", - "filename_embd_weight": 0.1, - "outputs": {}, - "search_method": [ - "embedding", - "full_text" - ] - } - }, - "downstream": [], - "upstream": [ - "Extractor:CuteBusesBet" - ] - } - }, - "globals": {}, - "graph": { - "nodes": [ - { - "data": { - "label": "File", - "name": "File" }, - "dragging": false, - "id": "File", - "measured": { - "height": 48, - "width": 200 - }, - "position": { - "x": -301.4128436198721, - "y": 375.86728431988394 - }, - "selected": false, - "sourcePosition": "left", - "targetPosition": "right", - "type": "beginNode" + "upstream": [ + "Extractor:SunnyCooksSpend" + ] }, - { - "data": { - "form": { + "Extractor:EasyToesFail": { + "downstream": [ + "Extractor:SunnyCooksSpend" + ], + "obj": { + "component_name": "Extractor", + "params": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": [ + { + "content": "Text to Summarize:\n{TokenChunker:SixtyShirtsFeel@chunks}", + "role": "user" + } + ], + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + } + }, + "upstream": [ + "TokenChunker:SixtyShirtsFeel" + ] + }, + "Extractor:SunnyCooksSpend": { + "downstream": [ + "Extractor:CleverPianosInvite" + ], + "obj": { + "component_name": "Extractor", + "params": { + "field_name": "keywords", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": [ + { + "content": "Text Content\n{Extractor:EasyToesFail@chunks}", + "role": "user" + } + ], + "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nExtract the most important keywords/phrases of a given piece of text content.\n\nRequirements\n- Summarize the text content, and give the top 5 important keywords/phrases.\n- The keywords MUST be in the same language as the given piece of text content.\n- The keywords are delimited by ENGLISH COMMA.\n- Output keywords ONLY.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + } + }, + "upstream": [ + "Extractor:EasyToesFail" + ] + }, + "File": { + "downstream": [ + "Parser:HipSignsRhyme" + ], + "obj": { + "component_name": "File", + "params": {} + }, + "upstream": [] + }, + "Parser:HipSignsRhyme": { + "downstream": [ + "TokenChunker:SixtyShirtsFeel" + ], + "obj": { + "component_name": "Parser", + "params": { "outputs": { "html": { "type": "string", @@ -340,22 +166,23 @@ "value": "" } }, - "setups": [ - { - "fileFormat": "pdf", - "output_format": "markdown", - "parse_method": "DeepDOC" + "setups": { + "doc": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "doc" + ] }, - { - "fileFormat": "spreadsheet", - "output_format": "html" + "docx": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "docx" + ], + "vlm": {} }, - { - "fileFormat": "image", - "output_format": "text", - "parse_method": "ocr" - }, - { + "email": { "fields": [ "from", "to", @@ -366,227 +193,130 @@ "body", "attachments" ], - "fileFormat": "email", - "output_format": "text" + "output_format": "text", + "preprocess": "main_content", + "suffix": [ + "eml", + "msg" + ] }, - { - "fileFormat": "text&markdown", - "output_format": "text" + "html": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "htm", + "html" + ] }, - { - "fileFormat": "word", - "output_format": "json" + "image": { + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "suffix": [ + "jpg", + "jpeg", + "png", + "gif" + ], + "system_prompt": "" }, - { - "fileFormat": "slides", - "output_format": "json" + "markdown": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "md", + "markdown", + "mdx" + ], + "vlm": {} + }, + "pdf": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pdf" + ], + "vlm": {} + }, + "slides": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pptx", + "ppt" + ] + }, + "spreadsheet": { + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "xls", + "xlsx", + "csv" + ], + "vlm": {} + }, + "text&code": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "txt", + "py", + "js", + "java", + "c", + "cpp", + "h", + "php", + "go", + "ts", + "sh", + "cs", + "kt", + "sql" + ] } - ] - }, - "label": "Parser", - "name": "Parser" + } + } }, - "dragging": false, - "id": "Parser:HipSignsRhyme", - "measured": { - "height": 56, - "width": 200 - }, - "position": { - "x": -297.12089864837964, - "y": 532.2084591689336 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "parserNode" + "upstream": [ + "File" + ] }, - { - "data": { - "form": { + "TokenChunker:SixtyShirtsFeel": { + "downstream": [ + "Extractor:EasyToesFail" + ], + "obj": { + "component_name": "TokenChunker", + "params": { + "children_delimiters": [], "chunk_token_size": 512, - "delimiters": [ - { - "value": "\n" - } - ], + "delimiter_mode": "token_size", + "delimiters": [], + "image_context_size": 0, "outputs": { "chunks": { "type": "Array", "value": [] } }, - "overlapped_percent": 0.2 - }, - "label": "TokenChunker", - "name": "Token Chunker" + "overlapped_percent": 0, + "table_context_size": 0 + } }, - "dragging": false, - "id": "TokenChunker:KindDingosJam", - "measured": { - "height": 80, - "width": 200 - }, - "position": { - "x": 7.288275851418206, - "y": 371.19722568785704 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "chunkerNode" + "upstream": [ + "Parser:HipSignsRhyme" + ] }, - { - "data": { - "form": { - "field_name": "summary", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": "Text to Summarize:\n{TokenChunker:KindDingosJam@chunks}", - "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - }, - "label": "Extractor", - "name": "Summarization" - }, - "dragging": false, - "id": "Extractor:NineTiesSin", - "measured": { - "height": 84, - "width": 200 - }, - "position": { - "x": 9.537168313582939, - "y": 461.26662127765564 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "contextNode" - }, - { - "data": { - "form": { - "field_name": "keywords", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": "Text Content:\n{Extractor:NineTiesSin@chunks}\n", - "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nExtract the most important keywords/phrases of a given piece of text content.\n\nRequirements\n- Summarize the text content, and give the top 5 important keywords/phrases.\n- The keywords MUST be in the same language as the given piece of text content.\n- The keywords are delimited by ENGLISH COMMA.\n- Output keywords ONLY.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - }, - "label": "Extractor", - "name": "Auto Keywords" - }, - "dragging": false, - "id": "Extractor:TastyPointsLay", - "measured": { - "height": 84, - "width": 200 - }, - "position": { - "x": 7.473032067783009, - "y": 533.0519245332371 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "contextNode" - }, - { - "data": { - "form": { - "field_name": "questions", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": "Text Content:\n\n{Extractor:TastyPointsLay@chunks}\n", - "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nPropose 3 questions about a given piece of text content.\n\nRequirements\n- Understand and summarize the text content, and propose the top 3 important questions.\n- The questions SHOULD NOT have overlapping meanings.\n- The questions SHOULD cover the main content of the text as much as possible.\n- The questions MUST be in the same language as the given piece of text content.\n- One question per line.\n- Output questions ONLY.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - }, - "label": "Extractor", - "name": "Auto Questions" - }, - "dragging": false, - "id": "Extractor:BlueResultsWink", - "measured": { - "height": 84, - "width": 200 - }, - "position": { - "x": 2.905601749296892, - "y": 617.0420857433816 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "contextNode" - }, - { - "data": { - "form": { - "field_name": "metadata", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": {}, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": "Content: \n\n{Extractor:BlueResultsWink@chunks}", - "sys_prompt": "Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.\n\nImportant structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - }, - "label": "Extractor", - "name": "Generate Metadata" - }, - "dragging": false, - "id": "Extractor:CuteBusesBet", - "measured": { - "height": 84, - "width": 200 - }, - "position": { - "x": 327.16477358029204, - "y": 374.11630810111944 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "contextNode" - }, - { - "data": { - "form": { + "Tokenizer:ShyBalloonsSmell": { + "downstream": [], + "obj": { + "component_name": "Tokenizer", + "params": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, @@ -594,135 +324,408 @@ "embedding", "full_text" ] - }, - "label": "Tokenizer", - "name": "Indexer" - }, - "dragging": false, - "id": "Tokenizer:LegalHorsesCheer", - "measured": { - "height": 120, - "width": 200 - }, - "position": { - "x": 345.50155210663667, - "y": 533.0511852267863 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "tokenizerNode" - }, - { - "id": "Note:CruelSidesStick", - "type": "noteNode", - "position": { - "x": -29, - "y": 765 - }, - "data": { - "label": "Note", - "name": "Add more attributes", - "form": { - "text": "Using LLM to generate summaries, keywords, Q&A, and metadata." } }, - "sourcePosition": "right", - "targetPosition": "left", - "dragHandle": ".note-drag-handle", - "measured": { - "width": 281, - "height": 130 - }, - "width": 281, - "height": 130, - "resizing": false + "upstream": [ + "Extractor:CleverPianosInvite" + ] } - ], - "edges": [ - { - "data": { - "isHovered": false + }, + "globals": { + "sys.history": [] + }, + "graph": { + "edges": [ + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", + "source": "File", + "sourceHandle": "start", + "target": "Parser:HipSignsRhyme", + "targetHandle": "end" }, - "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", - "source": "File", - "sourceHandle": "start", - "target": "Parser:HipSignsRhyme", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Parser:HipSignsRhymestart-TokenChunker:SixtyShirtsFeelend", + "source": "Parser:HipSignsRhyme", + "sourceHandle": "start", + "target": "TokenChunker:SixtyShirtsFeel", + "targetHandle": "end" }, - "id": "xy-edge__TokenChunker:KindDingosJamstart-Extractor:NineTiesSinend", - "source": "TokenChunker:KindDingosJam", - "sourceHandle": "start", - "target": "Extractor:NineTiesSin", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__TokenChunker:SixtyShirtsFeelstart-Extractor:EasyToesFailend", + "source": "TokenChunker:SixtyShirtsFeel", + "sourceHandle": "start", + "target": "Extractor:EasyToesFail", + "targetHandle": "end" }, - "id": "xy-edge__Extractor:NineTiesSinstart-Extractor:TastyPointsLayend", - "source": "Extractor:NineTiesSin", - "sourceHandle": "start", - "target": "Extractor:TastyPointsLay", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Extractor:EasyToesFailstart-Extractor:SunnyCooksSpendend", + "selected": false, + "source": "Extractor:EasyToesFail", + "sourceHandle": "start", + "target": "Extractor:SunnyCooksSpend", + "targetHandle": "end" }, - "id": "xy-edge__Extractor:TastyPointsLaystart-Extractor:BlueResultsWinkend", - "source": "Extractor:TastyPointsLay", - "sourceHandle": "start", - "target": "Extractor:BlueResultsWink", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Extractor:SunnyCooksSpendstart-Extractor:CleverPianosInviteend", + "source": "Extractor:SunnyCooksSpend", + "sourceHandle": "start", + "target": "Extractor:CleverPianosInvite", + "targetHandle": "end" }, - "id": "xy-edge__Extractor:BlueResultsWinkstart-Extractor:CuteBusesBetend", - "source": "Extractor:BlueResultsWink", - "sourceHandle": "start", - "target": "Extractor:CuteBusesBet", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__Extractor:CleverPianosInvitestart-Tokenizer:ShyBalloonsSmellend", + "source": "Extractor:CleverPianosInvite", + "sourceHandle": "start", + "target": "Tokenizer:ShyBalloonsSmell", + "targetHandle": "end" + } + ], + "nodes": [ + { + "data": { + "label": "File", + "name": "File" + }, + "id": "File", + "measured": { + "height": 49, + "width": 200 + }, + "position": { + "x": 50, + "y": 200 + }, + "sourcePosition": "left", + "targetPosition": "right", + "type": "beginNode" }, - "id": "xy-edge__Extractor:CuteBusesBetstart-Tokenizer:LegalHorsesCheerend", - "source": "Extractor:CuteBusesBet", - "sourceHandle": "start", - "target": "Tokenizer:LegalHorsesCheer", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "form": { + "outputs": { + "html": { + "type": "string", + "value": "" + }, + "json": { + "type": "Array", + "value": [] + }, + "markdown": { + "type": "string", + "value": "" + }, + "text": { + "type": "string", + "value": "" + } + }, + "setups": [ + { + "fileFormat": "pdf", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "spreadsheet", + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "image", + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "system_prompt": "" + }, + { + "fields": [ + "from", + "to", + "cc", + "bcc", + "date", + "subject", + "body", + "attachments" + ], + "fileFormat": "email", + "output_format": "text", + "preprocess": "main_content" + }, + { + "fileFormat": "markdown", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "text&code", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "html", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "doc", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "docx", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "slides", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + } + ] + }, + "label": "Parser", + "name": "Parser_0" + }, + "dragging": false, + "id": "Parser:HipSignsRhyme", + "measured": { + "height": 57, + "width": 200 + }, + "position": { + "x": 316.99524094206413, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "parserNode" }, - "id": "xy-edge__Parser:HipSignsRhymestart-TokenChunker:KindDingosJamend", - "markerEnd": "logo", - "source": "Parser:HipSignsRhyme", - "sourceHandle": "start", - "style": { - "stroke": "rgba(91, 93, 106, 1)", - "strokeWidth": 1 + { + "data": { + "form": { + "children_delimiters": [], + "chunk_token_size": 512, + "delimiter_mode": "token_size", + "delimiters": [ + { + "value": "\n" + } + ], + "image_table_context_window": 0, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "overlapped_percent": 0 + }, + "label": "TokenChunker", + "name": "Token Chunker_0" + }, + "id": "TokenChunker:SixtyShirtsFeel", + "measured": { + "height": 73, + "width": 200 + }, + "position": { + "x": 616.9952409420641, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "chunkerNode" }, - "target": "TokenChunker:KindDingosJam", - "targetHandle": "end", - "type": "buttonEdge", - "zIndex": 1001 - } - ] + { + "data": { + "form": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": "Text to Summarize:\n{TokenChunker:SixtyShirtsFeel@chunks}", + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Extractor", + "name": "Summarization" + }, + "dragging": false, + "id": "Extractor:EasyToesFail", + "measured": { + "height": 89, + "width": 200 + }, + "position": { + "x": 606.9117864444606, + "y": 295.54747604679164 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "contextNode" + }, + { + "data": { + "form": { + "field_name": "keywords", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": "Text Content\n{Extractor:EasyToesFail@chunks}", + "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nExtract the most important keywords/phrases of a given piece of text content.\n\nRequirements\n- Summarize the text content, and give the top 5 important keywords/phrases.\n- The keywords MUST be in the same language as the given piece of text content.\n- The keywords are delimited by ENGLISH COMMA.\n- Output keywords ONLY.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Extractor", + "name": "Auto Keywords" + }, + "dragging": false, + "id": "Extractor:SunnyCooksSpend", + "measured": { + "height": 89, + "width": 200 + }, + "position": { + "x": 598.3422026718366, + "y": 414.3467657992519 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "contextNode" + }, + { + "data": { + "form": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": "Text to Summarize:\n{Extractor:SunnyCooksSpend@chunks}", + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Extractor", + "name": "Auto Questions" + }, + "dragging": false, + "id": "Extractor:CleverPianosInvite", + "measured": { + "height": 89, + "width": 200 + }, + "position": { + "x": 594.401162655802, + "y": 536.2317513894384 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "contextNode" + }, + { + "data": { + "form": { + "fields": "text", + "filename_embd_weight": 0.1, + "outputs": {}, + "search_method": [ + "embedding", + "full_text" + ] + }, + "label": "Tokenizer", + "name": "Indexer_0" + }, + "dragging": false, + "id": "Tokenizer:ShyBalloonsSmell", + "measured": { + "height": 113, + "width": 200 + }, + "position": { + "x": 911.3724897632962, + "y": 186.00527380751004 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "tokenizerNode" + } + ] + }, + "history": [], + "messages": [], + "path": [], + "retrieval": [], + "variables": [] }, - "history": [], - "messages": [], - "path": [], - "retrieval": [] - }, "avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAABYlAAAWJQFJUiTwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAABpQSURBVHgBbXoJfFTluf5zzpw5syaZZLKRjZCEBEIgLAFBLYIW0apsUnFprUu1t3q1WqWl2Nvi36X6v7aIqNV760WttVotFVBZROAiEowIYQ1bQvY9mcxk9jlzzn2+E7Cx7eSX32xneb93ed7nfb6R8A+PVWvfm3ek4exih01ZAkjFsiShu7cf40uKcM9ty5DuSUEyqSMS03i0AYfNCsOA+fofH+LceEJDXNNhtylQZRmlbh1/6gHur3Mi3KgAHkCxAZqb1/PzvYXniasN6TCcwPIpftzvTdaHNLneZZEfnz8tvXn0PaSLL9as/Zvnk7qDv7Zb5YfSUlPMzwZ8fqS4nXjgrpsxc1oJurqDpkHCVJtVMc+OxTU+SVBVC3R9ZBESDReL1HQdikWGhf/FqoavYjpu2ZuKzrMKUjOA66/fi+BgLjbvKQdSdUg0XgrJ0LmWmpIgXqwII0WXYbVznW4rBjULWgLS82FH4vGl49KHvl7AvCUPeeJWdXd2tneqsC4cjWB4OIw7br4BixZeytcxRKJxGmTASmNketIw/m6seB1PJGFVaCy/i3KR4lkcN0bVEVcN3LLPir1nHYAziTG5Cn660MDjoWswxTseLetfRDeNT9Ka7JQoXpgSwFyngR4u5C/tKXir2YGOuASFp8u8nmzI9WEd83GnNGQu4OoHXlzrjvQ95EgOYyiiY/Ilc3D/HUu4FhlDgTCN12iMBKfd+rWXv/mQ+D3M45L0usOhIoXvc91JrGxQ8OIBJ6QgF58rIz+X6ZGWQCEXMS/Hj81bHDhySoVaoOGXU/24JU2HapWx/pQdvz3kRtZkA3fNiGKsR0ePy4FlvNFW3lEJRZ5/xO18WLJv8BVrcJ7XArxjMIiaTA1/+q4XhRjCSZ8ECz2sWkV6gLksPCsxCnw/KgIa00XnAVbFAoXflzo0vDkAPPhpKsLDMiR6zghrcOQr8DCvbTQy7Ad6j/KeKQZuqvbjiaI4OkMKitN1fHtzOhodFjywPI4qScNZw4pImhV3PPU31Gyvg89hx2sbfoLTuZ750sy1H21w2K13LJxbDU9RJnY06di0XcFUFtVHy8MwkqwFhk++mNs0VNNGvCQ8H9eSNJqv+V/m1HEiYWD5p3a09togpzMHBhTcXJlATZ4VH7bo2NXBnGa6JMIGqseG8PKEEJS4bKbIVSVcwB/S4Jss4en5UVgGk+gc48Llnx7Hktt/g2S/D4mqCTDOdSAlNIiz9928Tspoe+zwOE/hVCYqdqYvw2c+F1rtUTx9xInOLTL+dmsYc3M1tIe4CAlmwXIdCMXiJvA4bCqyrXxh0/H9Qyp2HHBAUnRY0mTYmW4nFvtQVOBBPCmDdY6T24GZ53X8flYfZtPLp8My0l0WXF4kYewrbrSWyXhkbgzpEQNX5Ngx+81dkO97BeHSPCAQgTKzDIna02bdWQP+ZnlI8k09G2xCn94Mb9dDOIZe9PpUrCECFN+rY+k7TmxsVpDnNEyPJ+hxUbAOVYWXuV7miuO5Lub766nYsceBBTURTBmTgOYz8Na1YYQkG+IGC5uRDEQNVC4EGs61IyvDik5DQVmOFVVZEu7fy6jZZMy+VMMsJYGITcKvB5KQHqbxHhcwFGTYElAWTIGcYodMoNBUe7H8Yfb9KLB60BruY157sCH5Borp1c6EgsecEaT90MA9G5046Seq6LwAkUbk+oQUHV9GdWS/n4r/3JUCKTWJVfcCp664CY7vPQIXDYi9k2QNKYi2EHyGgba6YWy4cRP0tx5DTmEG8jwKEqytYWbay/tUpN5g4AZZx511brz4Y8Lq1A5I6Q6inx3JuBVJxYXg6vdopw0J2KA7PLDsedS7JkDDch1p0JjK3dIJxNvnIZLuQr7LwLdsCWz3WvHOByp+My+BFKbPkEXHPHr71d1OMOWRyfCXVQCdxMHFhJnI8bk41piFnlWbMGNgEh5bvxq3PrwA7d1NaNryEfKWXIn078xCIhBDPov6V1/ZcchtQaJUwqGfJHDL6q+w4FiUKepDjfskOqMWdIfjCDpt6A0m0D8UhjyvEtq0cbAEV1yyJk9OQ2NvADkJD7xyDiJtDeieXY03vHZIBVZUlmg40WRB0JfEH70qHnzbCR8RQ04H0hgJm1uG0yrBn4jj021lOHogB94ZSVzPIiyak45ZN3rQuG8Q3YeAaMUw7vx4PSw9cdGqkWFJ4L5tLoRvl7BwcRcWbT2HbiLg5wTLNuUI5iQTrD8N5U8th1xTita6VoQdDsTzvLCW5kDyfPRzwykxH7uCSEt1wGF3YOn8ctz46BwceCWKTUU2fLl2LKQZdMjrOlauOYxX/rsKId1GSAWsRCVnKptfREZ4iJ1DMfBQtQ/3ZybRTIiMhqMo92ZDdzONND+amwYxFPegcmwGAsGYCbuTd6ZiWkM3rvttJ7bImxHVh6HAhWtvm457ssKo3XQKfd1DKCr2oul0P/sM6QuRMBqLQrEft2DlNVcyZRJ4/WgtBvQQDv3gHLb+tQ7lSiUq20qx5EYDv24uhDbHgq7ibDzy4+N4/L1q6F0MO+F0gKhiGMTvmgB+VxRjsSpoVW0YbO6Bt0DFF0fPIcTWyazG9o1voLLIiyn//lOkZ6o4OSiZaPad33bjbcvrSEtmwIlUsO8jSb610e+AlcFqjBL9+D/gYFNk3QRZxIXeDEhF33/MsFmt8BY5cOpgHyZPz0Lw/xH/mefWpIsXiiLPUoopzho8Wz8BEx5owdiPP8CpFfPRevUUGGxYxUVhvFoVRB7RppmYnpvGhkXy9qvCKCY5PVjNJjkYjrHhGTh1thHDwTDSvUlseZpXrynHBl82ap5+DD6oGKFyIyRHMzTE+GxnylxT6kSsL8jGGMOgnkRVSQYOikabaVyxJnYuAa4BatgC92YZMWKeypgnmYsJXjZuWCDH+6H4y9BcxoLddwbNegqMRWPxuyl9eLKIeRogJ7JbUEyvOtIlHHghgjMth9HRH0ORJQvFC9jBeZPSwmx4Ur2oGJePk5t0dMlWHPBKyPnkA/rc8002yz9WGoo9Ko62RxCmA149+RBO1XVje/0QnIK5FmWlID/HiZtn1eDfLr8UA4NhhojECXb63ocgS4pNHg1oQkFtN8IZNvi5rJv+fwWOVvZjgqygWVNQUeBAVqqKYIT4HDfIThV4b2nHwgdd6Not6tUwO3g4rsPFBf7urtM49VYBblu3HfrsFMSduZDkf+ZZ4pMgOdbciR4U5KXiscV/Ri9Z8fRxbuTSFjl+mkStXcKZXV04Wd8Fm2I1Twyjj/4ooVfCXMZZpEte1LsPIYdN5Y3+H2BFiR1nhi1sSAoK2JSS7LqC6NlpeJzEbepPZfQ9uwgnD7ehYEM7GaRi8idFFD69/smeZuRW7EV1/A+oWDmMw2+tJHU+byLT6AedjPPDUaSRTlfl2ZGd60ax14bLJzOFWvyQU6fZ4Kl2oMfhR2NLDxuG/nUADTaLbFQjKg/DafSi64dzUbPTh0/PB9nQgLJcG/m+4EPfJHZiDpAJs98/SFq8qBoWLYS/7T0Kj0NBU6cPv9+wA6+fnI87Dl2OprW/QM0Xh1H5bgx733seDr0XVsZ99ELSGeW36zqwaEk5Vqd2476bxqHuBLODjECed1UZ8qalIPNKF/oOhtk55Qs1JJlRUNkXZuhX48Tcy2Fz5eLKQ61YXJMNN9v+cDhhelW+cLPRw06UYS/JzkRH5SmMzy9FiTsTqdIGwm8vfrBiATraEwgO6Ch44DtocJ3H8ne7seRXPtR+/CLOLfs2VKKDHf1cRwK8CSJkwm8f6IO1bheije147VSfuTApU3vEuNVyGU5UnUbfSTJA48LKJUHadLj1NhxfvBShe6/HXdftRt5VY7BgVRHSqgmLMdIT5rUwXFBsk3aPGi8FSw0Sfc4NdbPGKhFiQ/JHIugNMJWUkftk5jqw9nsH8Mn7TaiUilCt52HjwjQcW5KDaFcL0vbVIauhEakDPvTFA7hh9mTsZjNzWKKIkD1IM431RtVfM/D58mNwy5zdiOdWI2SGsXN8FZrW3I7yWhWLX6wl9FWjgxeyEutXtZShuzlqzgEq8ymRTJp2jx4tTQOzHfj9bUew//QRyGQHcy6dittemIBYIGlGWbBKEcBrx76BchSQqqSC4wMuMUpQ6+lEYOpUtE60wUeAiitJJDkZJsmzxFEKGbJ0HVYbHYRLq+pgi07BUEkheucQ36uqUHxWxtWvNmBMzIZuwuaA0YMMHuctt+LOI2Mw3Jo0511hhMh/kU6JC/OBIH1iIYY9iUidDR8938aRUMbilUWwTYoh5E+aBTox34rjtRrWL2xDZtwN0e6OkxPzTKTQzBKpAL0Gew9yze/6yZbHgwiII3yeBCnnl1EjwanFzYxTWTsZrSFUfOVDdiuRg3KBx12ChqE2/PzDcTj+YQh2zor7/urDI+fGYqAt8U+wJ+pBzM6xWMKcn+NkiFGOlenVRCEW+9CxJGyTE5iSLuMo8eKe92KwPOXD4s4YfBYbmycHd/51op3mGkhnX7bT2yKmCvNC4V8cMbY8FSFmibL4yb0MnVjRUZRiItq4wgA7QEraRGJ6GAeHGvHS0SlYf3szrr03C80HImaOs3xN2eTiaGkaL1CINSGmNjdnhQEySG2QPaGIlDnA/sBDx5TyPH5/w5c27KrnkLR3APptaXCeSWDapmZ8yX4jC5VD0GVzCRYzGuI1k4emjzw0WiDmRHmIB+xj0KJcZx1D18nG1YsOdr0o2uJ+/PvaEjx73XmcqA/RbgkOj0gZySz2KCWVv6OQYaoRuIBCYlkC0VQy1bhFo9/Y/jnkvxxg09vqwd5aO6oK6QDmkdKroXaGG/VpjbAzClHT0ISpaoRptmSKJ9IFbJRGvafTwgwDfQM2aq7Hwj/FDBUbJ8aVOdCwPYTGthApggPj5zux+91BOEU3oreFWCUsDQpP0/MONjFxU7Eokf8p/N5iY1NLZbpoEopqXVj3uZNDvYxXfgH88DYV117lghZiLvUm0VGQj5ARwKKnZ2Lidfno1wPkQpwDpJhp2T8+xGirSJDwrx6m1jNInJ5iw8tPVCKFMshrS9vRTaqweEcuwj1itBxhoS6miyheIXJZhfe5KGISShw6WuwSLvncgTOH6ZhsGSXTOD/k0WHEcC+P6ajI5BjHmdSvY/ZcD258exlpdhKli/Ix9cdlOPzCOZzZ3wl/MMQ6tV2Igrg+0c/CPnA9NhkXFyFWJLKu1xjEPMcVGIqGGBkDk9noPjzUi6t/lo0Vz+TAoPGBoGYqExdFLhMSzTSicEWYczok3H6ESsRBzq9chKpoSC9QwKEKzowkjyPVOCBhiJmZVqnj0axuVNoVtPclUZ5vQ3GWii6fhpCdvYRjXx+7f6A5QqeyWTIY6cVOeCvckObjXUOsShhup/eyyzyoeqIQyivZOLmzWWQisq9zYMWfc0EGjH5OUjamhlDhkqPw3uzAdEBJmoEnTgNP1nNWjDHVqEaIhidIvI1YnpnD+aGfw0gfCSMHoQerfPiRN4oGvwrO/KguspOecHJLGOawYyqYrKVwVDNLN4MKhlPIG5ooa6bQt56dYobdzVV7Sh3IyLIhjfJf58c6jhVacPO2fEwe74CvLQYhRrpcKuJMlRhvoCojApcwfgKnr61UImZudNFTMmxMlxhh58qxUTwzmwoGi/n1Yxqe2MiDOfteM2MYz+SH2ZnJj+J25PN4j1OB0Iy1xIhjBC1TSKHzHVGcDFqwpd2NfZ2Umig4iGwZY+eIdKyT1FEgCQ9WWRE6T0gytweZv1lkfUY3wxhLmjThog5qNi1CYYzHlXHK72ESLieqHKdEaEljGtJbBnP84aoYnq1JwEh1E7Fgpk/b/xr4jCPjJG8YXUkVOakKHSYLh5p1JKIqHCPR++XuBN46o+CRfS4MM5UmjU9i0hgdXgpoYL/oZPpJx1oCxr9sRppmGijkc/F+ND0QofNaDAgR+646Ge+fcZAuizpiuognRkgllvU9GEDvgA1jGdWk8Cb5vtQq4fsfb8Uvr/42LGrMLPbR1xb3kpMaXKQNc9/zoJmOu3dxHDO9GvqpXCsJlZRHxnnlPB7ISGHHp5pwMWQXPSzQRKjLLtLf6IXXF3Oeh6OMUvjzbTJW/8WFhJ+5nMlruBj7OMycB8+z+UP4U4UfNx/Ox6dPtcOVA+x4fytecX6K6t9/GwX0YFsS/6Ryq9TWA5TUJ77sgftaA+unh6EFmZIh6kPsUHusn+GAvh9+TojP9boJENm6iUFi5QkiiMBzq3IBz/WRwhbfiw2N8S4dZMEYs9mFlW+loCpfxrXTOWnRcGmY1SZykp6WeUyYw/qfz32G8DoZ3/vNtzD3nkKo5D3xD/14UrsaLbGQyWJHlO2RexucdQ3ee8Z/pcK+Avjt5Ch6KaA7dRXjMo7gV/pqbA9vhT8axFrPXVTqXVA8Fhf6SVOFsaL9C4cYo3ZbNH5QSHoYYAQu+0zBoWa7qVDPuwm4ezoEY0fXm0B9gJ2XvCdJplE0No71i8JoMi6B9CMdO+dvw+ZX92HH9l14+Z3bYItnQFFls5uHo4kRR9lVjKeE7n2eMuL1wC/zoxhmPY7h+CkA44qu9XBxtr4urRLjLBl4L3QM2bZUyL3BYXqZ0MQLCCogeIyZ8zQ+lXlelpYkcaNq/GoKDp9yIsPLIbuGfYD6D/VXxHqBoy30JKVDGwXe564YwI5Jg7D20aD7ktjbcI4ClQPfeeYW7Gx/E2cpMyoWMlECBhVUjpcy8rLTkGON4e49doTzJCybGoeXmyJvDjnx6Os2vLKqj2GPI6xH0WMJ4LNEE/bHz6KHEpCcGFBGOhhG5llhuMa8r6TWudknI+PtFGxg+1c8pAY5TC86SKXuE2K3fJieX7WOZ2YpuGuWD4em92MyC7U1acfYigQO/9yJaYW5rCUdZ042o7ahH++8/gK6mo/CGiIz/UxFSUkaXn3rA+6TqXj7ICncVUBrixUPrpZQPO0MfnHfOUzs2wmpIw/VkQLsOd6GhrZeKM0uxBq5hWVIf0+XBEM5gchykJJ3wRYVPQHCouCGYsuMXTcRIUUmnW5vpcOJCMLrc+aE8dxYdghi8/mEHTkZXDR3Yg48qeLIuz5om3PxH8FUaBMNKhIJfLBlC8mTFfeWNuNSDuk/3fI7Ktyp+MvVN7ILMqKUhrxzmvBMj4H98nnsJa1umd6CiS2pONLUSXt0HPjuo7j5i//BALfBFJMCM8/zGLKYVceCOjv2HaBs6L3AOTJYkUNWEx416jq+RjJQyihjJibwwmV+yioGWmi4h1he6BQbCEQMbkXt23kEamEAp9qA5p1lyLmSewbkLiX5OZDZSGvWbcD/7KpD585z+PPuP2LpLjpyqYSfzehEb28r3pe/ouTfhstnVeHNX6zEmAdXIjs7w9xI+4/tW6B1C4mf7lWIueVMlzVNlEdeS8MXX9oxs5jNjNK5JISyAGUWAZGi0XBbSM2y4Kn5/dhbMQQHi3aQQ09JNpU9t2Jyd4EsQvMsySvGtHUBVJRmITZ0oX8YukkTztS24j/XrYOzK4ocdxzlkypxltPsQvaNjo79qMVmRPQgJ7IMHKfUs9S7DvPHjIdxkvXJ/9NHu9C+w4/gnjjkfWKHhHj+0ud2uFhA/02au/xOjn4kWGK7U3RoKT7CZ26dHkDdrE7MYkdtpkZflM1OShlRNCnNbEaGOQvEWTtX/UHBl1QX7HcdRtEy7o/R8jSStf1H+/GjSw/hBstP0HRiK55b/xKG+ukcyjNFGw/jS8s+uIx0XCSYGvlEZIhOWR3EtPQsyMcJAueJmA0k/sf4+u63U5qt+VJxxVReJCuBADfUCnji2SG6ilOULnB5SgTrCqgYk0G2ay54WRMuu2ymSyJpjOLnI3tmoiGl6k7cevY8yjOrcYm6ET/7YxlSCmUTScIM5xVZK/DSsXvgirhxXecPIMXeQKO+By49/Rswbi5E9BaPBf0vhzDzllxMKivEno5TcFvVesVRkNzkyVV+IkhUJrvjs7U6Al9Q80lIyGJRra3oxTQCVSOrKyvFwp12i8lbBHbHEyO7loK7CHlFYyMa2Ue2jOhCtgJ8susM6mI3mtKiws9bOwewcg+lwstySBDjGFIG8Xn7u6QEb1B55qRHbDb0b1Ibk534NRa3E9GjGq65uwrbXjrGvTjUc5vVmOcwtN0qJcIhzhUxcjvVbeDn1PhvdUVNpuimipqVajEvlfwH3iI6t6AbKhudoNnJURERFEQcv7/+FL63aDKvP4wz56MoLM9GhPsGpnzL1v1vPXciev9rOLYsgJmrHkdQLoa5rzvqId65OE/k3ZiK3tNUClNYoBZlnBy9U9ozzAzpOUJ2SUK+rGYYX13Ri/lEpTbJhdx0i0mFdWO08SM7lYJWmx3cqTKb5Au/n8DX+SuaYkGREzvuDmKy9Q8oz3sHNksKIThi2id2Z8Ult036K6bU7Yc7twgNt9xBMa2ZbFb+xgLE2O1rj0LmODrru2Ox8EfV67a983DzyJ3WGp4pxbHd60p93B6W0UMmmUua66aOw5T+ht4jSF3swigpNvsukrCLNZC4MNgLddrQRwZ9uTEFDccHMGacE1nc8FDGJhALj6gXIv3yily4e9w2TGhOwTu/uQztjnbMeGQtWSmdKmVAzHsmxxELtmlYEJ5Z/xw2zIf0wdDIpLz98ei2VXe/2685HKpLnV1Itdlikb5OB7FKcSONixBeFosQ84HxL351oFgsI9HgABClgYl+FTEjgcwyG7jjijANj3Hj0MUFWq0jURO/kehv82M3N1iu3TmAzNY01K5dgfayfDhaW5Hq76RLIxQamK5aYl16S8qd9Uuf/fuPPUY/Gs77ijlGriHAVPPrqRc/F+giclrkemykii8M8MY3yk2jxDgi8BI1hknUQkIHtZh7ZyBNFlGh0g7VzlqjRxUzikRAwuVNZX/BGEqLU6XJXHwz9IKpqFuYi/PjYs26v2dT+rFzH7RvW7BntL3/B41Ezp+M4ooqAAAAAElFTkSuQmCC" } diff --git a/agent/templates/chunk_summary.json b/agent/templates/chunk_summary.json index c3c17ade44..06935a21c7 100644 --- a/agent/templates/chunk_summary.json +++ b/agent/templates/chunk_summary.json @@ -12,220 +12,64 @@ }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", - "dsl": { - "components": { - "File": { - "obj": { - "component_name": "File", - "params": {} - }, - "downstream": [ - "Parser:HipSignsRhyme" - ], - "upstream": [] - }, - "Parser:HipSignsRhyme": { - "obj": { - "component_name": "Parser", - "params": { - "outputs": { - "html": { - "type": "string", - "value": "" + "dsl": { + "components": { + "Extractor:PublicPlumsKiss": { + "downstream": [ + "Tokenizer:FullBottlesDeny" + ], + "obj": { + "component_name": "Extractor", + "params": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } }, - "json": { - "type": "Array", - "value": [] - }, - "markdown": { - "type": "string", - "value": "" - }, - "text": { - "type": "string", - "value": "" - } - }, - "setups": { - "pdf": { - "output_format": "json", - "suffix": [ - "pdf" - ], - "parse_method": "DeepDOC" - }, - "spreadsheet": { - "output_format": "html", - "suffix": [ - "xls", - "xlsx", - "csv" - ] - }, - "image": { - "output_format": "text", - "suffix": [ - "jpg", - "jpeg", - "png", - "gif" - ], - "parse_method": "ocr" - }, - "email": { - "output_format": "text", - "suffix": [ - "eml", - "msg" - ], - "fields": [ - "from", - "to", - "cc", - "bcc", - "date", - "subject", - "body", - "attachments" - ] - }, - "text&markdown": { - "output_format": "text", - "suffix": [ - "md", - "markdown", - "mdx", - "txt" - ] - }, - "word": { - "output_format": "json", - "suffix": [ - "doc", - "docx" - ] - }, - "slides": { - "output_format": "json", - "suffix": [ - "pptx" - ] - } + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": [ + { + "content": "Text to Summarize:\n{TokenChunker:FancyCitiesStick@chunks}", + "role": "user" + } + ], + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 } - } - }, - "downstream": [ - "TokenChunker:LateExpertsFeel" - ], - "upstream": [ - "File" - ] - }, - "TokenChunker:LateExpertsFeel": { - "obj": { - "component_name": "TokenChunker", - "params": { - "chunk_token_size": 512, - "delimiters": [ - "\n" - ], - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - }, - "overlapped_percent": 0 - } - }, - "downstream": [ - "Extractor:YummyGhostsType" - ], - "upstream": [ - "Parser:HipSignsRhyme" - ] - }, - "Tokenizer:EightRocketsAppear": { - "obj": { - "component_name": "Tokenizer", - "params": { - "fields": "summary", - "filename_embd_weight": 0.1, - "outputs": {}, - "search_method": [ - "embedding", - "full_text" - ] - } - }, - "downstream": [], - "upstream": [ - "Extractor:YummyGhostsType" - ] - }, - "Extractor:YummyGhostsType": { - "obj": { - "component_name": "Extractor", - "params": { - "field_name": "summary", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - }, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": [ - { - "content": "Text to Summarize:\n\n\n{TokenChunker:LateExpertsFeel@chunks}", - "role": "user" - } - ], - "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - } - }, - "downstream": [ - "Tokenizer:EightRocketsAppear" - ], - "upstream": [ - "TokenChunker:LateExpertsFeel" - ] - } - }, - "globals": {}, - "graph": { - "nodes": [ - { - "data": { - "label": "File", - "name": "File" }, - "id": "File", - "measured": { - "height": 48, - "width": 200 - }, - "position": { - "x": 50, - "y": 200 - }, - "sourcePosition": "left", - "targetPosition": "right", - "type": "beginNode" + "upstream": [ + "TokenChunker:FancyCitiesStick" + ] }, - { - "data": { - "form": { + "File": { + "downstream": [ + "Parser:HipSignsRhyme" + ], + "obj": { + "component_name": "File", + "params": {} + }, + "upstream": [] + }, + "Parser:HipSignsRhyme": { + "downstream": [ + "TokenChunker:FancyCitiesStick" + ], + "obj": { + "component_name": "Parser", + "params": { "outputs": { "html": { "type": "string", @@ -244,22 +88,23 @@ "value": "" } }, - "setups": [ - { - "fileFormat": "pdf", + "setups": { + "doc": { "output_format": "json", - "parse_method": "DeepDOC" + "preprocess": "main_content", + "suffix": [ + "doc" + ] }, - { - "fileFormat": "spreadsheet", - "output_format": "html" + "docx": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "docx" + ], + "vlm": {} }, - { - "fileFormat": "image", - "output_format": "text", - "parse_method": "ocr" - }, - { + "email": { "fields": [ "from", "to", @@ -270,226 +115,449 @@ "body", "attachments" ], - "fileFormat": "email", - "output_format": "text" + "output_format": "text", + "preprocess": "main_content", + "suffix": [ + "eml", + "msg" + ] }, - { - "fileFormat": "text&markdown", - "output_format": "text" + "html": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "htm", + "html" + ] }, - { - "fileFormat": "word", - "output_format": "json" + "image": { + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "suffix": [ + "jpg", + "jpeg", + "png", + "gif" + ], + "system_prompt": "" }, - { - "fileFormat": "slides", - "output_format": "json" + "markdown": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "md", + "markdown", + "mdx" + ], + "vlm": {} + }, + "pdf": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pdf" + ], + "vlm": {} + }, + "slides": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pptx", + "ppt" + ] + }, + "spreadsheet": { + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "xls", + "xlsx", + "csv" + ], + "vlm": {} + }, + "text&code": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "txt", + "py", + "js", + "java", + "c", + "cpp", + "h", + "php", + "go", + "ts", + "sh", + "cs", + "kt", + "sql" + ] } - ] - }, - "label": "Parser", - "name": "Parser" + } + } }, - "dragging": false, - "id": "Parser:HipSignsRhyme", - "measured": { - "height": 412, - "width": 200 - }, - "position": { - "x": 316.99524094206413, - "y": 195.39629819663406 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "parserNode" + "upstream": [ + "File" + ] }, - { - "data": { - "form": { + "TokenChunker:FancyCitiesStick": { + "downstream": [ + "Extractor:PublicPlumsKiss" + ], + "obj": { + "component_name": "TokenChunker", + "params": { + "children_delimiters": [], "chunk_token_size": 512, - "delimiters": [ - { - "value": "\n" - } - ], + "delimiter_mode": "token_size", + "delimiters": [], + "image_context_size": 0, "outputs": { "chunks": { "type": "Array", "value": [] } }, - "overlapped_percent": 0 - }, - "label": "TokenChunker", - "name": "Token Chunker" + "overlapped_percent": 0, + "table_context_size": 0 + } }, - "dragging": false, - "id": "TokenChunker:LateExpertsFeel", - "measured": { - "height": 80, - "width": 200 - }, - "position": { - "x": 600.5891036507014, - "y": 197.6804920892271 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "chunkerNode" + "upstream": [ + "Parser:HipSignsRhyme" + ] }, - { - "data": { - "form": { - "fields": "summary", + "Tokenizer:FullBottlesDeny": { + "downstream": [], + "obj": { + "component_name": "Tokenizer", + "params": { + "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, "search_method": [ "embedding", "full_text" ] - }, - "label": "Tokenizer", - "name": "Indexer" - }, - "dragging": false, - "id": "Tokenizer:EightRocketsAppear", - "measured": { - "height": 120, - "width": 200 - }, - "position": { - "x": 1136.0745258879847, - "y": 202.22674640530906 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "tokenizerNode" - }, - { - "data": { - "form": { - "field_name": "summary", - "frequencyPenaltyEnabled": false, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat@DeepSeek", - "maxTokensEnabled": false, - "max_tokens": 256, - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - }, - "presencePenaltyEnabled": false, - "presence_penalty": 0.4, - "prompts": "Text to Summarize:\n\n\n{TokenChunker:LateExpertsFeel@chunks}", - "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", - "temperature": 0.1, - "temperatureEnabled": false, - "topPEnabled": false, - "top_p": 0.3 - }, - "label": "Extractor", - "name": "Transformer" - }, - "dragging": false, - "id": "Extractor:YummyGhostsType", - "measured": { - "height": 84, - "width": 200 - }, - "position": { - "x": 870.1728208672672, - "y": 201.4516837225608 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "contextNode" - }, - { - "id": "Note:MightyPandasWatch", - "type": "noteNode", - "position": { - "x": 1128.1996486833773, - "y": 342.4601052720091 - }, - "data": { - "label": "Note", - "name": "Index summary", - "form": { - "text": "Using summary to build both text and vector indexes." } }, - "sourcePosition": "right", - "targetPosition": "left", - "dragHandle": ".note-drag-handle", - "measured": { - "width": 249, - "height": 128 - }, - "selected": false, - "dragging": false + "upstream": [ + "Extractor:PublicPlumsKiss" + ] } - ], - "edges": [ - { - "data": { - "isHovered": false + }, + "globals": { + "sys.conversation_turns": 0, + "sys.date": "", + "sys.files": [], + "sys.history": [], + "sys.query": "", + "sys.user_id": "" + }, + "graph": { + "edges": [ + { + "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", + "source": "File", + "sourceHandle": "start", + "target": "Parser:HipSignsRhyme", + "targetHandle": "end" }, - "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", - "source": "File", - "sourceHandle": "start", - "target": "Parser:HipSignsRhyme", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "id": "xy-edge__Parser:HipSignsRhymestart-TokenChunker:FancyCitiesStickend", + "source": "Parser:HipSignsRhyme", + "sourceHandle": "start", + "target": "TokenChunker:FancyCitiesStick", + "targetHandle": "end" }, - "id": "xy-edge__Parser:HipSignsRhymestart-TokenChunker:LateExpertsFeelend", - "source": "Parser:HipSignsRhyme", - "sourceHandle": "start", - "target": "TokenChunker:LateExpertsFeel", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "data": { + "isHovered": false + }, + "id": "xy-edge__TokenChunker:FancyCitiesStickstart-Extractor:PublicPlumsKissend", + "source": "TokenChunker:FancyCitiesStick", + "sourceHandle": "start", + "target": "Extractor:PublicPlumsKiss", + "targetHandle": "end" }, - "id": "xy-edge__TokenChunker:LateExpertsFeelstart-Extractor:YummyGhostsTypeend", - "source": "TokenChunker:LateExpertsFeel", - "sourceHandle": "start", - "target": "Extractor:YummyGhostsType", - "targetHandle": "end" - }, - { - "data": { - "isHovered": false + { + "id": "xy-edge__Extractor:PublicPlumsKissstart-Tokenizer:FullBottlesDenyend", + "source": "Extractor:PublicPlumsKiss", + "sourceHandle": "start", + "target": "Tokenizer:FullBottlesDeny", + "targetHandle": "end" + } + ], + "nodes": [ + { + "data": { + "label": "File", + "name": "File" + }, + "id": "File", + "measured": { + "height": 49, + "width": 200 + }, + "position": { + "x": 50, + "y": 200 + }, + "sourcePosition": "left", + "targetPosition": "right", + "type": "beginNode" }, - "id": "xy-edge__Extractor:YummyGhostsTypestart-Tokenizer:EightRocketsAppearend", - "markerEnd": "logo", - "source": "Extractor:YummyGhostsType", - "sourceHandle": "start", - "style": { - "stroke": "rgba(91, 93, 106, 1)", - "strokeWidth": 1 + { + "data": { + "form": { + "outputs": { + "html": { + "type": "string", + "value": "" + }, + "json": { + "type": "Array", + "value": [] + }, + "markdown": { + "type": "string", + "value": "" + }, + "text": { + "type": "string", + "value": "" + } + }, + "setups": [ + { + "fileFormat": "pdf", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "spreadsheet", + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "image", + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "system_prompt": "" + }, + { + "fields": [ + "from", + "to", + "cc", + "bcc", + "date", + "subject", + "body", + "attachments" + ], + "fileFormat": "email", + "output_format": "text", + "preprocess": "main_content" + }, + { + "fileFormat": "markdown", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "text&code", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "html", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "doc", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "docx", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "slides", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + } + ] + }, + "label": "Parser", + "name": "Parser_0" + }, + "dragging": false, + "id": "Parser:HipSignsRhyme", + "measured": { + "height": 197, + "width": 200 + }, + "position": { + "x": 316.99524094206413, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "parserNode" }, - "target": "Tokenizer:EightRocketsAppear", - "targetHandle": "end", - "type": "buttonEdge", - "zIndex": 1001 - } - ] + { + "data": { + "form": { + "children_delimiters": [], + "chunk_token_size": 512, + "delimiter_mode": "token_size", + "delimiters": [ + { + "value": "\n" + } + ], + "image_table_context_window": 0, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "overlapped_percent": 0 + }, + "label": "TokenChunker", + "name": "Token Chunker_0" + }, + "id": "TokenChunker:FancyCitiesStick", + "measured": { + "height": 73, + "width": 200 + }, + "position": { + "x": 616.9952409420641, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "chunkerNode" + }, + { + "data": { + "form": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": "Text to Summarize:\n{TokenChunker:FancyCitiesStick@chunks}", + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Extractor", + "name": "Summarizer" + }, + "id": "Extractor:PublicPlumsKiss", + "measured": { + "height": 89, + "width": 200 + }, + "position": { + "x": 916.9952409420641, + "y": 195.39629819663406 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "contextNode" + }, + { + "data": { + "form": { + "fields": "text", + "filename_embd_weight": 0.1, + "outputs": {}, + "search_method": [ + "embedding", + "full_text" + ] + }, + "label": "Tokenizer", + "name": "Indexer_0" + }, + "id": "Tokenizer:FullBottlesDeny", + "measured": { + "height": 113, + "width": 200 + }, + "position": { + "x": 1216.9952409420641, + "y": 195.39629819663406 + }, + "sourcePosition": "right", + "targetPosition": "left", + "type": "tokenizerNode" + }, + { + "data": { + "form": { + "text": "Using summary to build both text and vector indexes." + }, + "label": "Note", + "name": "Index Summary" + }, + "dragHandle": ".note-drag-handle", + "dragging": false, + "id": "Note:ElevenKingsPick", + "measured": { + "height": 127, + "width": 267 + }, + "position": { + "x": 735.9586746349814, + "y": 315.614230763182 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "noteNode" + } + ] + }, + "history": [], + "messages": [], + "path": [], + "retrieval": [], + "variables": [] }, - "history": [], - "messages": [], - "path": [], - "retrieval": [] - }, "avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAABYlAAAWJQFJUiTwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA7ESURBVHgBvVpLrF1lFV7/v/c+j3t729tSClZCigXFF/JQE21CaKITHcAEica5UwcmDsW5A4mJTIwzB5iQiMTIwAhGQzQSrKJIomADWkpLuX3cx3ns/f9+31rrP4/b2xqNsJtzzzn7/I/1r/Wtbz12g+y6Nq7kB1KQB0OSh3KQY/IeXV0QyXn5Xox4JZEQ5BR+OhUr+fbBYTi9OCaUDxs5r8umfAvjvy7v8pX9lfRvuP5g/Fznpe/fjVMc5GC4KGU2hU9b8izWu1vepUsFVi1nCD4XOha5YPLFw+S88Bkfe9nWcKFPhVZO8hA2H5p/t4RvQ5YJ3qfYfdpl6Vz9lQsfY1aoVPhWQdJQXrgfgonMt2mYmwGf7k4NZOZvGzv5WGrlH/J/uqjlhB06mWOaugSOZ9qm0LxbJ8LILvqAyohBwXCv62Q4QU42M0L0uGAlDDtZp85O8r9cXIp7Usu0c2vAVqEqOuCS0PauQuOUHDNeBEV2APl8npFTGgg/4RedE0wZZU2Rh+r/FjozocWwnJLdzS4kNVfPhM66ZcX9k1mHcGqp2jAXRaG0hy9z6YkeIsjU77X8PhfmwXDhym7yuvbVFngk8yjOrKqwDA+auUiTjGlaP2hKjulg88QhwbkhOjtxUVi0wEatgVfNw7uV6B88FMfX1xO44Jnazp1tqJMqM3mRMwY6XFDOznhvk2mc83QRh0oVDehVsIl1DDNqad25GzEnnkB0Qo2bFAYjE9EiAV86P3x9TaFJd50oBExos9lM08BF5OBg9yn0GBNVyXl2MmcZ+175IdxlwEpirOTxgHPH2SxF8fqYMB4lxeYA0jcucKuWCdIrFpgJzYmFOajRmSMGv5cxwTWSDZedUmPRpMPJfUEVTh/wzx1kmXTcJ8FJo2qRXpI786cpBnBfKivAnBvjIOcuiawNsxwAfvfvMxKgEinrlMp4Ez7Qdtmpzk0splmFqdj37EFzUhwyzSNqED9oMCwbv5vGqWGu35JFqCAeBHDsctZgRX+qYKVhL8qwMcEmrej4BlZeGwR55WySmw9EaXF/fdXSCyqP64Uzl7N6ZMGllM2doAn9tgjtjut0rRYhPConlMoRxbHTZBrvDFMQymiQkyk89xjA/isQsAWwt6dZrVpR+zkBMkEPOMXmB/cF+dvZVo6s1Wq1tZ6xhir+LVhAFU8BHD6dzCGVHB5pJrQf0IWecb36gEVbHjLhz1QZK+piWcwCBODKIOohtzCAAtKzAjRGeDYYvn8l43BRxnCsGPieZf8wyN/fyrK+EmTYFxlWJqPSaHQtU/DWBciO9Rk8qmWoRJMZZnXtQmsJuMZffI8KxZD8EHj1ENkq3NuBSSdtmlmRizbQCK3Zg1aGyNzWIewTL07lCx9vIHSS22+MOjbjkJvbQf3q0JpRaz0N5ojZo2NwKFWqbjhtMFyQ3wMpjfQILE5oHTofnZH01kXzF2iMTjrGepzbRIvQV8aGfVULxkQIUymMIiABrQNKPz7Vyi0Hgvz0pSSPP9LI9389kc1JlHtviXJxBMvhoJex9irGXt6GVWCNOHWHyTErLGoKDS1gXekVGqSFIPgOKG0TPDfiAfDaxOm326xQ6/C+Bc+6MqElgzrwCL9f3AFUxjZeU2PgftAkObwSsb7IX8508qMXkzz6zES+fF+Uc5tBHr6vkm8+PZGvneiZM6s8QXbAIO87FOXtzaQ+SxoO/7oECMU5vosvkFrbzpyxhTdmwiOb8MRe7TzO74wZg8ZwrTRJ1oG26T4NhcaC+oJW1gdZXt/I8uQfsjxyT5ZfvRp1j2+cFPnOLzusGeTkHUHuOhrV/146k+SeW4OMp2J0jc37sNiFTdDrwJ24MshajpONPaaOU0bD7Za0lzRokBm2W7NI7XzPg6gfuNNTIfztMCjvZVDg8SMRB8ny8psQ/I+dfOneCrSZ5Zk/Z/nM8SB3Hgny+9eT3H+8lgNDUQ1fHInccRj+0mlKKk2TFYK09hBauQQk7Ou5EzPkt0p55OVkQQpCjTtSpGl/GxQz4UIVgw1wPqUf2EGUofBnMAAt4pSn32nl+OFavvdcJ1/5dCUHEYieBExuWs9y4rZKD0EK/eJHKnl7K8udNxnUpmQprH3DEEFsC76B9wGEpUWraIqhr049dqyCTsMbF3NOHr5png4LVDgQnW6nNUgQf8UPxq79XmW/E8cNPlMbv3illRMfrORpCPj5D1WyD3T3879iLfjO7UdQ+W1GOXulk4fvqbBmVM4/OEhyaWRr0KEHdVKokhTqaL5I4Sk0uWjcJSWKXmMyRI2oyQISMbqGTQErOKOnD/i3A23TpJt4TaeEW5bHf9NCU9A4TYv5F7aYo1Tywmut3H97lB/8tlPtnTgGwXeyvH9/LZ+7MwD3lfpQhMnWeknxf8MKIiys11fhbde+sxSVtQViGIGNyHyHVk14yaZcdWLNF8k+oKERoPLaBct+Jh5NYwkIGHMzaO4izP4aHPED6wFMZEU3zUtn/uHvRD55q6gVD8IHju4PeIE5trMqatgPEDwr1GgB8nIuuVYMnucUMoBFIOwQSqIfMq4wcvdqMpwF3VppUgwipLoLW3AQaGzQD1ra0bRqKqw8BPU8+rOJfPVTNWjQNEPqHSUmeVExedfRTj57rDIYtBauzm1mzfNvQDI2AButNpaY55IAegozcXqO0QIX/Y1WzErLIIhoySJnU9kMospCvLU98fQWwrz6TlDWWEVE7DrDPVOEd7aSvHIOGoQAq4Dah4HrTXDzPqQGhNwAsIg134N2GTYn5hs0eZ9FCBSxqgLNk0diewxNEwfRqznmpoR0v7YYwFgwmVoKvor1eI/s2JZoTmpidOWiByD0QfAr04pLgMpl4h4njZhNS3wUjPH86SQfOwptwoOfeDHIC/9kbeAlJg46gWM1cDQ0oaSPoNWIpQn7Gk31VPxRSyrke+UpylSjfQU/GEIh+wCVqjZmZPQnjRrsklVtXguoBUYTOhPZB45TWyHx/GnRJGp9aFYZtRY5iatDQ4T2cVJTHx6SsbI6OrVNndRaxFgE56H7zDobsySxramEBp7O0xbs2US1UMhWsDBDoJxkoaaxd0tWli87ALxmMuUilqjxhOe3wefnLUkb9ixl6HtlRV9IweJBla0HUkdbmokZWyHcsIZCmrrS36ggK947nUu/6yGvYDwoiSHxT1ailAo7T2lK8a81BKuyMK9F+Fk5OcXCBpaFruOX1T5xbKXeIEaLgtgwQaDR2MpKmrUO3udhskc/8Hq5Bga0WM8UxKqgCG5cqRvVNq9xZ8lhciZb7RncyIrZy0zN6MmGwQorjQmV5VvhzJWUmTWOJ97PsXRDg08PePzT2VqmgBgZJCEyM73vBbeA5u8mfPQqnywxxMGowdJXUCjBEk1txQ+zA8KzbS0BIy0OqG2vu5OIt2vs8FY2m2IZGzguutK0IgteZe8A1wxmXOfyyALUDjR05lJE8gTsr4imsBq6xTJVa51YK3CtD+1WSe+TRusqanSNtWGbTDL29gq1TMg2XvSr0F4IKaSy5UC+lflCtFqZjFV6p15SWnlIjTC7JMan2WBCLW/AGm9dtiKc3EvaLO3BFZicGSb5nRilX/Qb1rPWfbDE0DoWTAnUqRurLcjtTL1zMkuVJpl17uxgjacq4jnZ7i5WeGPD0ungNe32xIJOD0dmXLCSLyg1nt+MyiRUEdMIpgIkT0KiD4ekcApBKZHUDqB5TmN1MAOQp10e5oMmaFrGeuOrroOTQNY0IKUou6/sVgpvMJVIppHoPLU1TnoAb/nA9Eyu6EmtLsy4q/UtoNFnzq+sJNpHmpQEkGZvsrHMQt+wYDvxA6yl1SBhUxmZKJWWdku+Sm47fLBUhaShNMpNK2cF1q1TBJkdUKvi14v3WZhXdgE9At8hF/orbROvuGp7N3osjQHzC7p7l60c5Td17Eo08nMf7cZdJTTGBzMt02iSju6t2aqnrKygqsoaVnSuDhuxqGFgI9zryoOJrh8N297vYUrATLLBAGK2OGSbC6azYpQsxnqD1Nproq9nvSIVPO+CSJo/xWnckSmAte/5YzQLaCLXWYJFrq1cCaOJZaRNLF0E6zp0zF10QUuqepW3Bl1wyd7ZyFb0U4NkLiqjV3vwCdZI2I2S4guWG9EXSztx7/G1ndYgwbqTuFYqIz8jNYidWad04gLpr2f4Y2HdLbXZDc9aP8/KSz9kP+jx1RKq7WWotLlQpzk6g6MGrmQN3flTgeVrlo3ytJ22+mTeTpH584Au2XMsNpo0Z59p23GarPXVOpOQMpva1o3XwrZr2xBmlKkw0edoMiturnUZU5XFskdJb8gWK+SFwdl0aB3l4Bsl8a5bVGyTAntaY2T1YuX0XXZvHV6aHmDTQaFfZsX5aussabz0YLOlF8vtdYb8yHTYqKfNngGGmbx6aeuwk9nTRgrL4EUGC154e2zaU9sxWN6kTJVZhwdL4iRcnW7u0jgX0v2jDa8h3Gnse0w3ScbZtQcjzd7zvOViEHUiFndiL/ipDuaYeUHogjCmJNnX7lU2j1rpsiy05pevsr+25sXK2tYF96Yh752KiDVPLU7M2R5vUpuWf2Q1m2nfn3kh8Rr0LM+3p4lBhV+8Zm31TtTZ2W1mJquMkgqMrhbas2lVYs8F1w65a1wfkiQpD0lOhfNX8gNoKz6b93AyfUiXwsyRzdt9s3S11mba9uc6zBybELz+Ddd9Mk/6pHC1zIkj+XMz1/bV8rVyW7xxLTxXpfBYWTcsPFBOnqN0IdtiIc8ebuzWXOnoaS2rPZ6gwrcOE5uyNxPxGrjw3Id9Yr4rhNLewuPWY3hSf1pX3NjI67mRZ6f+yLUcIl+HDWxzOxApsCpPZoLxf8rlicJe8+ZPcbxKVaFdMBU4XMeZcbJTcbrwXw34ATX1SYj72NK4kJcssiS4czSDDjVeOzW2M9qM19rcfMsFTQvCc6uC72td1HwR3pdbvt7cyceqLj2Knz6Br2qRRUvMtBesn6S4l3xd7i6CK6MkrVPcYa3bEfN/mJrlNPTyFIb9hJBf/O3fQ3B6D7564aoAAAAASUVORK5CYII=" } diff --git a/agent/templates/title_chunker.json b/agent/templates/title_chunker.json index 42f3c1a581..6c81ff0615 100644 --- a/agent/templates/title_chunker.json +++ b/agent/templates/title_chunker.json @@ -12,151 +12,64 @@ }, "canvas_type": "Ingestion Pipeline", "canvas_category": "dataflow_canvas", - "dsl": { - "components": { - "File": { - "obj": { - "component_name": "File", - "params": {} - }, - "downstream": [ - "Parser:HipSignsRhyme" - ], - "upstream": [] - }, - "Parser:HipSignsRhyme": { - "obj": { - "component_name": "Parser", - "params": { - "outputs": { - "html": { - "type": "string", - "value": "" + "dsl": { + "components": { + "Extractor:PublicPlumsKiss": { + "downstream": [ + "Tokenizer:FullBottlesDeny" + ], + "obj": { + "component_name": "Extractor", + "params": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } }, - "json": { - "type": "Array", - "value": [] - }, - "markdown": { - "type": "string", - "value": "" - }, - "text": { - "type": "string", - "value": "" - } - }, - "setups": { - "pdf": { - "output_format": "json", - "suffix": [ - "pdf" - ], - "parse_method": "DeepDOC" - }, - "text&markdown": { - "output_format": "text", - "suffix": [ - "md", - "markdown", - "mdx", - "txt" - ] - }, - "word": { - "output_format": "json", - "suffix": [ - "doc", - "docx" - ] - } + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": [ + { + "content": "Text to Summarize:\n[Insert text here]", + "role": "user" + } + ], + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 } - } - }, - "downstream": [ - "TitleChunker:BusyPoetsSearch" - ], - "upstream": [ - "File" - ] - }, - "Tokenizer:NeatRadiosEnd": { - "obj": { - "component_name": "Tokenizer", - "params": { - "fields": "text", - "filename_embd_weight": 0.1, - "outputs": {}, - "search_method": [ - "embedding", - "full_text" - ] - } - }, - "downstream": [], - "upstream": [ - "TitleChunker:BusyPoetsSearch" - ] - }, - "TitleChunker:BusyPoetsSearch": { - "obj": { - "component_name": "TitleChunker", - "params": { - "hierarchy": 3, - "levels": [ - [ - "^#[^#]" - ], - [ - "^##[^#]" - ], - [ - "^###[^#]" - ], - [ - "^####[^#]" - ] - ], - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - } - } - }, - "downstream": [ - "Tokenizer:NeatRadiosEnd" - ], - "upstream": [ - "Parser:HipSignsRhyme" - ] - } - }, - "globals": {}, - "graph": { - "nodes": [ - { - "data": { - "label": "File", - "name": "File" }, - "id": "File", - "measured": { - "height": 48, - "width": 200 - }, - "position": { - "x": 50, - "y": 200 - }, - "sourcePosition": "left", - "targetPosition": "right", - "type": "beginNode" + "upstream": [ + "TokenChunker:FancyCitiesStick" + ] }, - { - "data": { - "form": { + "File": { + "downstream": [ + "Parser:HipSignsRhyme" + ], + "obj": { + "component_name": "File", + "params": {} + }, + "upstream": [] + }, + "Parser:HipSignsRhyme": { + "downstream": [ + "TokenChunker:FancyCitiesStick" + ], + "obj": { + "component_name": "Parser", + "params": { "outputs": { "html": { "type": "string", @@ -175,43 +88,157 @@ "value": "" } }, - "setups": [ - { - "fileFormat": "pdf", + "setups": { + "doc": { "output_format": "json", - "parse_method": "DeepDOC" + "preprocess": "main_content", + "suffix": [ + "doc" + ] }, - { - "fileFormat": "text&markdown", - "output_format": "text" + "docx": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "docx" + ], + "vlm": {} }, - { - "fileFormat": "word", - "output_format": "json" + "email": { + "fields": [ + "from", + "to", + "cc", + "bcc", + "date", + "subject", + "body", + "attachments" + ], + "output_format": "text", + "preprocess": "main_content", + "suffix": [ + "eml", + "msg" + ] + }, + "html": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "htm", + "html" + ] + }, + "image": { + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "suffix": [ + "jpg", + "jpeg", + "png", + "gif" + ], + "system_prompt": "" + }, + "markdown": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "md", + "markdown", + "mdx" + ], + "vlm": {} + }, + "pdf": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pdf" + ], + "vlm": {} + }, + "slides": { + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "pptx", + "ppt" + ] + }, + "spreadsheet": { + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content", + "suffix": [ + "xls", + "xlsx", + "csv" + ], + "vlm": {} + }, + "text&code": { + "output_format": "json", + "preprocess": "main_content", + "suffix": [ + "txt", + "py", + "js", + "java", + "c", + "cpp", + "h", + "php", + "go", + "ts", + "sh", + "cs", + "kt", + "sql" + ] } - ] - }, - "label": "Parser", - "name": "Parser" + } + } }, - "dragging": false, - "id": "Parser:HipSignsRhyme", - "measured": { - "height": 204, - "width": 200 - }, - "position": { - "x": 316.99524094206413, - "y": 195.39629819663406 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "parserNode" + "upstream": [ + "File" + ] }, - { - "data": { - "form": { + "TokenChunker:FancyCitiesStick": { + "downstream": [ + "Extractor:PublicPlumsKiss" + ], + "obj": { + "component_name": "TokenChunker", + "params": { + "children_delimiters": [], + "chunk_token_size": 512, + "delimiter_mode": "token_size", + "delimiters": [], + "image_context_size": 0, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "overlapped_percent": 0, + "table_context_size": 0 + } + }, + "upstream": [ + "Parser:HipSignsRhyme" + ] + }, + "Tokenizer:FullBottlesDeny": { + "downstream": [], + "obj": { + "component_name": "Tokenizer", + "params": { "fields": "text", "filename_embd_weight": 0.1, "outputs": {}, @@ -219,153 +246,315 @@ "embedding", "full_text" ] - }, - "label": "Tokenizer", - "name": "Indexer" + } }, - "dragging": false, - "id": "Tokenizer:NeatRadiosEnd", - "measured": { - "height": 120, - "width": 200 - }, - "position": { - "x": 855.3572909622682, - "y": 199.08562542263914 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "tokenizerNode" - }, - { - "data": { - "form": { - "hierarchy": "3", - "levels": [ - { - "expressions": [ - { - "expression": "^#[^#]" - } - ] - }, - { - "expressions": [ - { - "expression": "^##[^#]" - } - ] - }, - { - "expressions": [ - { - "expression": "^###[^#]" - } - ] - }, - { - "expressions": [ - { - "expression": "^####[^#]" - } - ] - } - ], - "outputs": { - "chunks": { - "type": "Array", - "value": [] - } - } - }, - "label": "TitleChunker", - "name": "Title Chunker" - }, - "dragging": false, - "id": "TitleChunker:BusyPoetsSearch", - "measured": { - "height": 80, - "width": 200 - }, - "position": { - "x": 587.0312356829183, - "y": 197.9169308584236 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "chunkerNode" - }, - { - "data": { - "form": { - "text": "It is ideal for documents with well-defined headings, such as product manuals, legal contracts, research reports, and academic papers." - }, - "label": "Note", - "name": "Chunk by Title" - }, - "dragHandle": ".note-drag-handle", - "dragging": false, - "height": 159, - "id": "Note:KhakiBerriesPick", - "measured": { - "height": 159, - "width": 323 - }, - "position": { - "x": 623.9675370532708, - "y": 369.74281927307146 - }, - "resizing": false, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "noteNode", - "width": 323 + "upstream": [ + "Extractor:PublicPlumsKiss" + ] } - ], - "edges": [ - { - "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", - "source": "File", - "sourceHandle": "start", - "target": "Parser:HipSignsRhyme", - "targetHandle": "end" - }, - { - "id": "xy-edge__Parser:HipSignsRhymestart-TitleChunker:BusyPoetsSearchend", - "source": "Parser:HipSignsRhyme", - "sourceHandle": "start", - "target": "TitleChunker:BusyPoetsSearch", - "targetHandle": "end", - "data": { - "isHovered": false + }, + "globals": { + "sys.conversation_turns": 0, + "sys.date": "", + "sys.files": [], + "sys.history": [], + "sys.query": "", + "sys.user_id": "" + }, + "graph": { + "edges": [ + { + "id": "xy-edge__Filestart-Parser:HipSignsRhymeend", + "source": "File", + "sourceHandle": "start", + "target": "Parser:HipSignsRhyme", + "targetHandle": "end" + }, + { + "id": "xy-edge__Parser:HipSignsRhymestart-TokenChunker:FancyCitiesStickend", + "source": "Parser:HipSignsRhyme", + "sourceHandle": "start", + "target": "TokenChunker:FancyCitiesStick", + "targetHandle": "end" + }, + { + "data": { + "isHovered": false + }, + "id": "xy-edge__TokenChunker:FancyCitiesStickstart-Extractor:PublicPlumsKissend", + "source": "TokenChunker:FancyCitiesStick", + "sourceHandle": "start", + "target": "Extractor:PublicPlumsKiss", + "targetHandle": "end" + }, + { + "id": "xy-edge__Extractor:PublicPlumsKissstart-Tokenizer:FullBottlesDenyend", + "source": "Extractor:PublicPlumsKiss", + "sourceHandle": "start", + "target": "Tokenizer:FullBottlesDeny", + "targetHandle": "end" } - }, - { - "data": { - "isHovered": false + ], + "nodes": [ + { + "data": { + "label": "File", + "name": "File" + }, + "id": "File", + "measured": { + "height": 49, + "width": 200 + }, + "position": { + "x": 50, + "y": 200 + }, + "sourcePosition": "left", + "targetPosition": "right", + "type": "beginNode" }, - "id": "xy-edge__TitleChunker:BusyPoetsSearchstart-Tokenizer:NeatRadiosEndend", - "markerEnd": "logo", - "source": "TitleChunker:BusyPoetsSearch", - "sourceHandle": "start", - "style": { - "stroke": "rgba(91, 93, 106, 1)", - "strokeWidth": 1 + { + "data": { + "form": { + "outputs": { + "html": { + "type": "string", + "value": "" + }, + "json": { + "type": "Array", + "value": [] + }, + "markdown": { + "type": "string", + "value": "" + }, + "text": { + "type": "string", + "value": "" + } + }, + "setups": [ + { + "fileFormat": "pdf", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "spreadsheet", + "output_format": "html", + "parse_method": "DeepDOC", + "preprocess": "main_content" + }, + { + "fileFormat": "image", + "output_format": "text", + "parse_method": "ocr", + "preprocess": "main_content", + "system_prompt": "" + }, + { + "fields": [ + "from", + "to", + "cc", + "bcc", + "date", + "subject", + "body", + "attachments" + ], + "fileFormat": "email", + "output_format": "text", + "preprocess": "main_content" + }, + { + "fileFormat": "markdown", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "text&code", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "html", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "doc", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "docx", + "output_format": "json", + "preprocess": "main_content" + }, + { + "fileFormat": "slides", + "output_format": "json", + "parse_method": "DeepDOC", + "preprocess": "main_content" + } + ] + }, + "label": "Parser", + "name": "Parser_0" + }, + "dragging": false, + "id": "Parser:HipSignsRhyme", + "measured": { + "height": 57, + "width": 200 + }, + "position": { + "x": 316.99524094206413, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "parserNode" }, - "target": "Tokenizer:NeatRadiosEnd", - "targetHandle": "end", - "type": "buttonEdge", - "zIndex": 1001 - } - ] + { + "data": { + "form": { + "chunk_token_size": 512, + "delimiter_mode": "token_size", + "delimiters": [ + { + "value": "\n" + } + ], + "image_table_context_window": 0, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "overlapped_percent": 0 + }, + "label": "TokenChunker", + "name": "Token Chunker_0" + }, + "id": "TokenChunker:FancyCitiesStick", + "measured": { + "height": 73, + "width": 200 + }, + "position": { + "x": 616.9952409420641, + "y": 195.39629819663406 + }, + "sourcePosition": "right", + "targetPosition": "left", + "type": "chunkerNode" + }, + { + "data": { + "form": { + "field_name": "summary", + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "Qwen/Qwen3-8B@SILICONFLOW", + "maxTokensEnabled": false, + "max_tokens": 256, + "outputs": { + "chunks": { + "type": "Array", + "value": [] + } + }, + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompts": "Text to Summarize:\n[Insert text here]", + "sys_prompt": "Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.\n\nKey Instructions:\n1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.\n2. Language: Write the summary in the same language as the source text.\n3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.\n4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.", + "temperature": 0.1, + "temperatureEnabled": true, + "tenant_llm_id": 54, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Extractor", + "name": "Summarizer" + }, + "id": "Extractor:PublicPlumsKiss", + "measured": { + "height": 89, + "width": 200 + }, + "position": { + "x": 916.9952409420641, + "y": 195.39629819663406 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "contextNode" + }, + { + "data": { + "form": { + "fields": "text", + "filename_embd_weight": 0.1, + "outputs": {}, + "search_method": [ + "embedding", + "full_text" + ] + }, + "label": "Tokenizer", + "name": "Indexer_0" + }, + "id": "Tokenizer:FullBottlesDeny", + "measured": { + "height": 113, + "width": 200 + }, + "position": { + "x": 1216.9952409420641, + "y": 195.39629819663406 + }, + "sourcePosition": "right", + "targetPosition": "left", + "type": "tokenizerNode" + }, + { + "data": { + "form": { + "text": "Using summary to build both text and vector indexes." + }, + "label": "Note", + "name": "Index Summary" + }, + "dragHandle": ".note-drag-handle", + "dragging": false, + "id": "Note:ElevenKingsPick", + "measured": { + "height": 127, + "width": 267 + }, + "position": { + "x": 735.9586746349814, + "y": 315.614230763182 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "noteNode" + } + ] + }, + "history": [], + "path": [], + "retrieval": [], + "variables": [] }, - "history": [], - "messages": [], - "path": [], - "retrieval": [] - }, "avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAABYlAAAWJQFJUiTwAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAABMaSURBVHgBbVprjF3Vdf72Puc+5s7Lnhm/x/ZgCHWAFLdVKEqogJY2ikIJEUSBhiZFIaVRKZj+KFHa1G7apBS1KlGpkh99EEUgykMiKlFpFCUIyS1pSTFCQLADHmNjjz3jec99nXP27rfW3ufOtZOxru/cc8/Zez2+9a3HHoMLfv7skjuuy/Li4wbmZn6c8t7rdWMMnHOw1kKuyWf56f9dfuSe8rO8y/fyjFyXn/7fy+/llSRJeNbyWW5pZQ2+jOzvcbiW1A67wv3FwelHp/vl7e18cOr3NrTQOcCb9/cLVQpcblIUxfrDZl0B2UffRSlVQp5z5ylXGqEnbFSg/935gsIn4bmoPC8i4T+qw//xMJ8URRZ7CqjwvvVDY5J9pcVE0J4l5Tbjoxg+COc8wtdiUW6a2AuEkZco7HrrqTAXeKp8uai0tWKsILzjq5Kk+rvhtcRENbw/nAPXixK6Yst1DvChfaUAsplYqTSzRxGFFeFsUEldbPW65b1BIYuAOLm5vGZU6XK9fkiWgpfXre69rlgqcKMXfbhB1ynkszH7qkgP6DpfnLptiiIc+xlXRryLRZwzUSgfrBE3UGGCsYKgIkwvNoJFXQ+O8nLhnd7xbh3Fcm+aprqn/B7ihEpH+NJMVM6qBxROGhv8nNrr6R97oMR4vwLBwtS4KGXyKoKJiyo+uai8yz9ZmDsHTIoBihA/Vr9yEXrBa/3wRLAvCnlAl0j5lqvysk4JOlnDYj3WqCJcbm5Oucw+9LmzDFoIbHxF9lPLCkxUEB/CIeDZB6bgPy6G7rJHa1UepZUq0BvTmkc3p7I0XrfNbQuiV7DM640hoFI3PYsHAnJBaBNgozKlia7psG7kQgnGfTylCL3ADewQAtCaCq3iVFi57n0ZxxSKgeW4YLaWYHm+wMQu4PIbEuy5po6pq6sY2lxFbUiYhJtXJOC76sW800a3VaC1lGH+WAevfx+YfsngnVcdOh1gYrMoI1okEQnB43mWhz0FlqqZwInXvZ9KSyT6qK08ldBcOYOpYgOG1SCBj8kKFcyeLDC2HfitL6b41Ts3YnCsyuclaMUTEqxpQIawTyYWr/P5nJasoz7kUB8BxnYWeN+vZ7ylzRubOPGywXNfK/Dysx5jVKQ2ZHpxkiZJdEjwgigpwSxBzyD+Hd+fUKzC2vQeEOsrZBKDueMeOy73+My/jGPylwaVZfIOnxNLwcYArfHBNCakAM3CdSKfFxSgUHgqMn0WmAti2Sbh1ETW9njij7p44Z8Mtl7MjQuD3NEDgkFCz0aCSAQRjBfzJ7tu8/38LIuLNW0P38TumlGP7P+Pcey+aphYLoKwIqCuWGbeCoUSxqgqjeqLMIKwmI15hEIrNjTKhXWohFKjC5jPm6gNLmPpdII//UAbI42UMPS9xBYBQcNW4POuGGZd+BC8gS4Fe0nF4MxJh6tur+Dvz+3C9l+s00KF4tBqLhAz2kiGRtlIc7FxCGGWR5bxwUO6vHjIqjVBkoCv8zKvoaoGSNJBenUbGmMpHjlXR23MIWtCvR1ShlcP5HmHy9DQJfbLNG5iGqd3cOJoji+9MIpbH96C9mre5yWjTlcLK3UmgszoCeFNocGcG7XVo/QhBchDHhBgm1TjAvSUsXUuMUAv1PhkVVJeVHgcnZUBHHi1hsYWmqMLJQUJYMn6gXByiYP1pJQkAcc2yXH6bY+HT2zC7g+O0OqZJI31hCWL9FQxIT0pZYQrmhlc4H7hdNqL75n+rt4RZeQVadkrxGp8Mg0wdMryvDSK9nJQIpfMzF0phvyvDAlFZn+1KfydOJx6exUPvTNAOqxrginpUwJTaBa+QAB/zF8avEmkuBjLNigCTUBiUXmGQUxKVY+YbogPUUrjqIjJi+ukkn8SVdRWGXOrA/jq0RSnT9gASGI/JUTSpBo8UP6kaQ0nj5/CAz8YwshWWi4vgj1NSaX0DuPTVEJG1LRvArVJUJbcDR/hHmsmX8a5QFUh5VRgR3ZSr/iuKufFMLoW73Oh9lRMpKM0bB33fzfFwntSQlQDCRRFTHpRifbaKq65NcHe62twmVjcoxfWQia1HN3/ZoJ7ja6uZqqQjym+lFxVtSYKG12EsqI1amWtVK1Rz3ifa5wInuV3Lx6PPtPiORQQvD6GvR9hsvwVq/obk4ZCslf703ULi0v43GMJceeDlWghNZaEaIMlwWND8Ic3onP8MnS+OwE7UESRgyAmVnaa9MI+uk6onxxClZwHWq1KVoSSRcgLwWCI0AtwsiGe4nveGcPvfstg4WQwqhBOWtJnc6GN2/+S6bnDJCG1h9zilkk0tUCRtEz97Xk8Pd3G8OvL+MgHh9Tlap+kiAFMjs9imS0QkDRQkYDshAAVtDDJoc79/nqNXmzAjo7x+Qz1O5cgpKV0K0nLB8MZG+GictYwfkkVu65sY+1cTPbhC2BluYXr/jBRo5sB1i4NWm6IbFOZ0x5AIr+5N8dH3RFcu4MiTS2jLIWz13IUR8gUL5HTc6+ZVZKUm8/R+ocM2ROb0f7HDXAzRulVY0NMVyO77RyDHaoBfQSgHtLE63WdkrYlfopsENfur6C1EAwfPZDgihtaqDQqrCgrWLyXbDTWgFmqo3LTGmrXnIVrT6DxKSaZD+/hQh0kO7h4h0ueoIBP8769m+iwFtIhMsQVThMP2ty8VdUaCBsp1/wqksk8JLm61yDUGHL1YEUtF6R65B0nqMQa8wLvSd/Hmollp2ed5fIBXPnJJTx2l0ddiEcy8dKcwyfvJKF1+fACrfpTJpbLKrDDQzDkYVTP8Muz3HAUlUlCB51o5Rqtx70Z3KgSTiMENStU1yT9DZP5zwSaLLot3joA985GZPMrsFe1lNkMS2tTSbRg1PpLvHaOBnl8mEmaMBwkfCfG0P7PWaQ3riK9lFDM2IvVath2aYHOWgzi1lqGi65OlGmKFaJqwCvmpW5xJ+vIn98KP78RrecXsPild9H+9jA6j04qBTYfYj0yxCTVIqZrxPvRceSPjCP7cYLOE23kSRN+cQEFGc4WTFJHqfFZ0uM4PdIlVbeoXB7YSj43H+Q64/x+iJ9ZelvmjfTGK+AObaMsbHZMoeX2jl9mvHZDjkGNTcXoFqkMCix9jiuPtAiZDvKVReQnV5AdGoQ7shHFu6TQt7jpm6S9I07rFzfIjZjn3fwZZEvzsFwLW1kWMJvaBkOSybForwlHI1+ch1tqo5ghxCq55kLDfZyGboLuczTGFqv5xxF+tVt4f5fB/cpRXk+QvUg4VmU/i0t+zbC8EQ8wG6YSTLSe9Jm+0QrU2WqH9q5FBWbm4Igiu0XqfRfoT8KnWEb3++eo5CotTAaiIvnyIpVaQcJEiPdzHSEFF5KTN02kt88i2dcOI5p2F4WYX5NhB8Xb9KRlXHQpzy8Q95u4z5UrVIJ7Dg+gOEuSyGSa0ozVrQmdh7R1RlxT5YXrFlH83wjSDXGKQLkGPk+B9tAb38tC+8fOyjpudsZg4sU65m+it5hDLC2bvr/A0KcyureKwU/XsPoAE15DWIV7NBh0O0KKwTDTVJUlA+PDJbEIIft4ZvmCcPT/W0XruYwC0rugTG+cRn6Ke5/LVTHJF1IBSAeNTsvp5MGteIwfaGHmY4Mx6TAWWBvVrpqhcTfpgwVd6+n2bG0O1VzqGEKCyhta1Fa5wYzIRwt1aZBkBe3DVHa8huomQmZMatY1LZ+zmSY6nO7U5hhJZ1mHfrTNzy0tVQRCtcmthNgy8qECFRF4fB5VNlF2AzSTv/tKjsqADhbYrGRJgIYMpyQ1DBKXnSxMJ5h4Cr/C9zmt31Uxuth3GaCLzBcp7006yn6av4tQWkhPI6ySWyojGUzY5mzIzKJ0MsHnzzaVJs0EKZuUbHc4rY/ABqbzkzkM3gts+HNS9LWDKN4cw+Bt/J6VsWTs4y8TahVSbMYFOq0EK7MOg6NpUEA5N9dJgF8KDbSrrdGSpL+V4ZBsBIPLRtO8tWSrLgMwJ94W6fRXKSyhVv8AP9cC3+fzC8wRG7D85RFSLK9/aE55X/KFXWshf9dj+L4aZm9hgtxtUB1vYPFuqVAp8KAQAjD72Rzj3yIdM1n+9MccJmyXYk/qbGaEk69IH+C0bExHwzApW1lCZdRg9cuXYvUru6kA+b8IjY2tMXxcNXQGjULHJfnCImG3htW/IX4fMRpi9YtZGHZYPmcM8MU5ZCcW0ZleQTrFpMkmqXN6Bu2ZU8jfY098kUXjrgbzRY5sgcw1wudG2wyNDtpvNVH/zdB9vPF8V9vMnHuS5XIMb/T40TMDSOqxq9oiKZva89UkPeYn1tB9nYsRw55VanduHt1zM+i8tSLRiIGbqsinGYz0QLG8yoXPoZhd1e5q8H4q80YHnRmuwXqrOMWAn2a9VR2Af69AdqqF7hnCrE64tXOMfmEQm57agOoV5HnXQpEwe+9rYvxpzpFuJ4ngFJ56IMGGTVYJJfVW23b86Nk67vrXeQqQYPR+BvJVzMZ7ibGBFNkiLXySw6h6Fdm79FLKYGMKrjJJOTYXw5/eSGWXsPJkUz1XmRrEwC0VrcEqmxrYfsig9b22xk5lNwP3QyPaju48MsEYypU65bNrGe17k51VjH+NuUTgIx2ddGhNBn97Ecff3IFT73SxeZsJY8t7dn/Cy3zl7HsGX32xicnLCRG2lORBrD7J4Fqp090G9RsYuCkZaI3ZsUFGkdGbVJorYT5qBya08gzkRmxnQrdhhqOznKrT6tUwi0pTZmUWKgFsE633VRiUg0YTSEEqPu3g5hW6tdEE909uRqPO/JE77Y1TmdNI1pzYZvHsQzX88dMdjlGUXTH0GVIgs7OXEqAttQvdNsDsm8vMk4tnTvldq8XOLJlEMtxGhBrRxQTldH6jVW457BWl8kJrYR9LZc0vvtRAUjQfKGaVIArOhqrDBZ55YAK+SdarlCP+MJlRHvekukPfSTB7rBYsJZVhu07XyVDT9pr28J1Z3zj0mvqbNi3ClcWMDqpCH1kJgwgbuiw5vNB3W1HYGCvUnMR2knRsyELFe3xmVntmR0Ml9N7xlzfh3/+WcbDBhi6tPLu4e/dv+3I2L8YsWN9849gSOsuxK4oNj5YVYSxRdvKhrkec8dv13tqqpcNMSEoOJ7WHvvrH7OGl4xcd+BZxkFaeIwSDWXqiubwV93CUObmHeMnXzxaw3tAEDJL2sTTfwTNfGUGN5XARRx9hcp2tN8cWsdU0ejpTCu/j8ZPTd6tDrzDxliZmVV9MHrxR3ltctxuE13lmGsUxITdYozmgm+/CfuaFyT0IBaI9/yTIlick2pny95Ex4Nt/1cUP/nmCg9jQcSHCpsQ14kymHK+XjZQJg1VVSC0kQy8bsjNC4RuGYOWYMHpOJ0s2Ch8hadjWrs7vwhcmOti2KxyIlOdz60dS8fzgvPEib9zGbuvv7l7DoSfHGPmpWlmtKzjtwcWFXjXOs1x5nGTCJFvrKHlOsRbGgWH86OKcqZxkmHh0EkoQySXV4XG8yqHBvbta2Dkl4/Vg5DDm9+cdT6nk/SePckPOVL2bDz702Sa++fvDqG8YZeYLxz/heChbh1KRx+MkH87ElP6CYOKR0u4hiONJjonHqAFwMW5yJIMkDTeJB1nrf/OODmFDGOcXHGn1/Zzngf6jVR2J88Ht2y0OkVbv2EoWeG0CtZEG+wYXBDExEJM45NVhfpxOqLfitDlu6uMYMdBuPIlh4EoZUx0awOyJSXzj1gYh08UCx/gbNzmNof7J4XnHX/Ga+fzOjx2jAlMXnpP1/yTc9PSMxIfFJ+6p4Dfu7HByF4a2RbfQDkkSlMjv4sFGSEZGPSOFn01lEhEn1aTOrF3F6TfqePmpKv7rcYf50w6bthht3vvZr1+W/jPr+PmwuXvqpocZTPdpIpEpQZ+25an6uubkjhWDRTb+W5mdJy9OcNm1wJ4rqdxmh6Ex1u41E6bIjqc8rJvaqyzaWJedOcoDErakM0cSHPsf/n7SabW7YQJ6jlYeHQHnH7/2jz4vPKLl50fNH0zeeB3d+sPeGXHfue3Pw14/A8hkrLPKcrwjR0m836WKnCSOyKUMlya8SkapsdlJGUfVmnRiYVyox6fW9M4lfCwngggmkFyk6PKwvPxrgbC/v0i/vWfq5od5NnBfUSYqH2ecfVr/vL9x6FkG0q8k8ShUYJ7G46UQJjaumejELfxuje0dbocgNnF+2r9vaPZ1vBgR0Qf1rz84/fh+BRu3O0gMH05NEvNgSfP+PDf2w6o/qGTzXIdUIQY4CeIJYhZO2TW7h3O03Mv1XGdCPl5DzOQuFnQlZNEL9aKnzPofkrjDdVQPanzKfy8t/qT94Y17/43pm1Mse3UYnOK8aD8/Ftb/aOPCv4HQQ27JpElpufL0PemdQ2sxF1mqnFgrw8bEmJh+9oFO8kIO0Cz99QHbvvPg9BPrf+zR/7N/6uap3JmD9N2VXHKfFnsuTJh7R1F97/0e6o+Tdag5hY4QRJpUNCsnNhyb2pgvUAQFbYn7+Hyyvs40vfYdDoyffXD6iRf69/p/CbMWUUVYM2EAAAAASUVORK5CYII=" }