mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Feat: add button for remove header & footer in pipeline (#14486)
### What problem does this PR solve? Feat: add button for remove header & footer in pipeline ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@@ -52,7 +52,7 @@ class RAGFlowHtmlParser:
|
||||
raise TypeError("txt type should be string!")
|
||||
|
||||
temp_sections = []
|
||||
soup = BeautifulSoup(txt, "html5lib")
|
||||
soup = BeautifulSoup(txt, "html.parser")
|
||||
# delete <style> tag
|
||||
for style_tag in soup.find_all(["style", "script"]):
|
||||
style_tag.decompose()
|
||||
@@ -210,4 +210,3 @@ class RAGFlowHtmlParser:
|
||||
chunks.append(current_block)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
Reference in New Issue
Block a user