Fix: markdown parser in pipeline (#14032)

### What problem does this PR solve? Fix: markdown parser in pipeline ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-06-29 23:41:12 +08:00 · 2026-04-10 14:11:14 +08:00
parent 9ce293a736
commit 18cafff790
2 changed files with 15 additions and 2 deletions
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@@ -936,6 +936,7 @@ class Parser(ProcessBase):
            delimiter=conf.get("delimiter"),
            return_section_images=True,
        )
+
        if conf.get("output_format") == "json":
            json_results = []

@@ -954,6 +955,16 @@ class Parser(ProcessBase):
                json_result["doc_type_kwd"] = "image" if json_result.get("image") is not None else "text"
                json_results.append(json_result)

+            for table in tables:
+                table_text = table[0][1] if table and table[0] else ""
+                if table_text:
+                    json_results.append(
+                        {
+                            "text": table_text,
+                            "doc_type_kwd": "table",
+                        }
+                    )
+
            enhance_media_sections_with_vision(
                json_results,
                self._canvas._tenant_id,
@@ -962,7 +973,9 @@ class Parser(ProcessBase):
            )
            self.set_output("json", json_results)
        else:
-            self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
+            texts = [section_text for section_text, _ in sections if section_text]
+            texts.extend(table[0][1] for table in tables if table and table[0] and table[0][1])
+            self.set_output("text", "\n".join(texts))

    def _code(self, name, blob, **kwargs):
        """Parse text and source code files as plain text chunks."""
--- a/web/src/pages/agent/constant/pipeline.tsx
+++ b/web/src/pages/agent/constant/pipeline.tsx
@@ -39,7 +39,7 @@ export enum EmailOutputFormat {
 }

 export enum TextMarkdownOutputFormat {
-  Text = 'text',
+  Text = 'json',
 }

 export enum TextJsonOutputFormat {