fix(deepdoc): accept GFM table separators with one or more dashes (#16319)

This commit is contained in:
Harsh Kashyap
2026-06-25 16:55:57 +05:30
committed by GitHub
parent e290a0d23e
commit 66d86154ab
2 changed files with 13 additions and 1 deletions

View File

@@ -193,7 +193,7 @@ class MarkdownElementExtractor:
def _is_table_separator_row(self, line):
cells = self._table_cells(line)
return len(cells) >= 2 and all(re.match(r"^:?-{3,}:?$", cell.replace(" ", "")) for cell in cells)
return len(cells) >= 2 and all(re.match(r"^:?-+:?$", cell.replace(" ", "")) for cell in cells)
def _markdown_table_ranges(self, text):
ranges = []

View File

@@ -153,6 +153,18 @@ class TestMarkdownElementExtractorTables:
"After",
]
@pytest.mark.p2
def test_custom_delimiter_preserves_gfm_short_separator_table(self, markdown_element_extractor):
text = "Before\n| Name | Value |\n| :-- | --: |\n| A | 1 |\nAfter"
sections = markdown_element_extractor(text).extract_elements(delimiter="`\n`")
assert sections == [
"Before",
"| Name | Value |\n| :-- | --: |\n| A | 1 |",
"After",
]
def test_custom_delimiter_preserves_html_table(self, markdown_element_extractor):
text = "Before\n<table>\n<tr><td>A</td></tr>\n<tr><td>B</td></tr>\n</table>\nAfter"