From 534729546e961c2cc01066b6312d8fda39ca4f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=9C=A3=E7=A5=BA?= Date: Mon, 30 Mar 2026 13:17:32 +0800 Subject: [PATCH] fix(html-parser): correct h4 heading mapping from ##### to #### (#13833) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Fix incorrect Markdown heading mapping for `h4` in `TITLE_TAGS` dictionary - `h4` was mapped to `"#####"` (h5 level) instead of `"####"` (correct h4 level) Closes #13819 ## Details In `deepdoc/parser/html_parser.py`, the `TITLE_TAGS` dictionary had a typo where `h4` was assigned 5 `#` characters instead of 4, causing h4 headings to be converted to h5-level Markdown headings during HTML parsing. ## Test plan - [ ] Parse an HTML document containing `

` tags and verify the output uses `####` (4 hashes) - [ ] Verify other heading levels remain correct 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Asksksn Co-authored-by: Claude Opus 4.6 --- deepdoc/parser/html_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index dcf33a8bbd..f4d360c641 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -33,7 +33,7 @@ BLOCK_TAGS = [ "table", "pre", "code", "blockquote", "figure", "figcaption" ] -TITLE_TAGS = {"h1": "#", "h2": "##", "h3": "###", "h4": "#####", "h5": "#####", "h6": "######"} +TITLE_TAGS = {"h1": "#", "h2": "##", "h3": "###", "h4": "####", "h5": "#####", "h6": "######"} class RAGFlowHtmlParser: