mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
fix(deepdoc): accept GFM table separators with one or more dashes (#16319)
This commit is contained in:
@@ -193,7 +193,7 @@ class MarkdownElementExtractor:
|
||||
|
||||
def _is_table_separator_row(self, line):
|
||||
cells = self._table_cells(line)
|
||||
return len(cells) >= 2 and all(re.match(r"^:?-{3,}:?$", cell.replace(" ", "")) for cell in cells)
|
||||
return len(cells) >= 2 and all(re.match(r"^:?-+:?$", cell.replace(" ", "")) for cell in cells)
|
||||
|
||||
def _markdown_table_ranges(self, text):
|
||||
ranges = []
|
||||
|
||||
@@ -153,6 +153,18 @@ class TestMarkdownElementExtractorTables:
|
||||
"After",
|
||||
]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_custom_delimiter_preserves_gfm_short_separator_table(self, markdown_element_extractor):
|
||||
text = "Before\n| Name | Value |\n| :-- | --: |\n| A | 1 |\nAfter"
|
||||
|
||||
sections = markdown_element_extractor(text).extract_elements(delimiter="`\n`")
|
||||
|
||||
assert sections == [
|
||||
"Before",
|
||||
"| Name | Value |\n| :-- | --: |\n| A | 1 |",
|
||||
"After",
|
||||
]
|
||||
|
||||
def test_custom_delimiter_preserves_html_table(self, markdown_element_extractor):
|
||||
text = "Before\n<table>\n<tr><td>A</td></tr>\n<tr><td>B</td></tr>\n</table>\nAfter"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user