From 66d86154ab5b739a2860edb2802dbc2653d856cc Mon Sep 17 00:00:00 2001 From: Harsh Kashyap Date: Thu, 25 Jun 2026 16:55:57 +0530 Subject: [PATCH] fix(deepdoc): accept GFM table separators with one or more dashes (#16319) --- deepdoc/parser/markdown_parser.py | 2 +- .../unit_test/deepdoc/parser/test_markdown_parser.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/deepdoc/parser/markdown_parser.py b/deepdoc/parser/markdown_parser.py index a79f552142..5acf7111bc 100644 --- a/deepdoc/parser/markdown_parser.py +++ b/deepdoc/parser/markdown_parser.py @@ -193,7 +193,7 @@ class MarkdownElementExtractor: def _is_table_separator_row(self, line): cells = self._table_cells(line) - return len(cells) >= 2 and all(re.match(r"^:?-{3,}:?$", cell.replace(" ", "")) for cell in cells) + return len(cells) >= 2 and all(re.match(r"^:?-+:?$", cell.replace(" ", "")) for cell in cells) def _markdown_table_ranges(self, text): ranges = [] diff --git a/test/unit_test/deepdoc/parser/test_markdown_parser.py b/test/unit_test/deepdoc/parser/test_markdown_parser.py index 9d8d6c53ed..e9c8a92413 100644 --- a/test/unit_test/deepdoc/parser/test_markdown_parser.py +++ b/test/unit_test/deepdoc/parser/test_markdown_parser.py @@ -153,6 +153,18 @@ class TestMarkdownElementExtractorTables: "After", ] + @pytest.mark.p2 + def test_custom_delimiter_preserves_gfm_short_separator_table(self, markdown_element_extractor): + text = "Before\n| Name | Value |\n| :-- | --: |\n| A | 1 |\nAfter" + + sections = markdown_element_extractor(text).extract_elements(delimiter="`\n`") + + assert sections == [ + "Before", + "| Name | Value |\n| :-- | --: |\n| A | 1 |", + "After", + ] + def test_custom_delimiter_preserves_html_table(self, markdown_element_extractor): text = "Before\n\n\n\n
A
B
\nAfter"