From 18cafff79046f02d56b618909ba646d1e82f543e Mon Sep 17 00:00:00 2001 From: Magicbook1108 Date: Fri, 10 Apr 2026 14:11:14 +0800 Subject: [PATCH] Fix: markdown parser in pipeline (#14032) ### What problem does this PR solve? Fix: markdown parser in pipeline ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/flow/parser/parser.py | 15 ++++++++++++++- web/src/pages/agent/constant/pipeline.tsx | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 6f2f26d041..cf756649b7 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -936,6 +936,7 @@ class Parser(ProcessBase): delimiter=conf.get("delimiter"), return_section_images=True, ) + if conf.get("output_format") == "json": json_results = [] @@ -954,6 +955,16 @@ class Parser(ProcessBase): json_result["doc_type_kwd"] = "image" if json_result.get("image") is not None else "text" json_results.append(json_result) + for table in tables: + table_text = table[0][1] if table and table[0] else "" + if table_text: + json_results.append( + { + "text": table_text, + "doc_type_kwd": "table", + } + ) + enhance_media_sections_with_vision( json_results, self._canvas._tenant_id, @@ -962,7 +973,9 @@ class Parser(ProcessBase): ) self.set_output("json", json_results) else: - self.set_output("text", "\n".join([section_text for section_text, _ in sections])) + texts = [section_text for section_text, _ in sections if section_text] + texts.extend(table[0][1] for table in tables if table and table[0] and table[0][1]) + self.set_output("text", "\n".join(texts)) def _code(self, name, blob, **kwargs): """Parse text and source code files as plain text chunks.""" diff --git a/web/src/pages/agent/constant/pipeline.tsx b/web/src/pages/agent/constant/pipeline.tsx index 757c69c4b2..67eca573f3 100644 --- a/web/src/pages/agent/constant/pipeline.tsx +++ b/web/src/pages/agent/constant/pipeline.tsx @@ -39,7 +39,7 @@ export enum EmailOutputFormat { } export enum TextMarkdownOutputFormat { - Text = 'text', + Text = 'json', } export enum TextJsonOutputFormat {