mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: markdown parser in pipeline (#14032)
### What problem does this PR solve? Fix: markdown parser in pipeline ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -936,6 +936,7 @@ class Parser(ProcessBase):
|
||||
delimiter=conf.get("delimiter"),
|
||||
return_section_images=True,
|
||||
)
|
||||
|
||||
if conf.get("output_format") == "json":
|
||||
json_results = []
|
||||
|
||||
@@ -954,6 +955,16 @@ class Parser(ProcessBase):
|
||||
json_result["doc_type_kwd"] = "image" if json_result.get("image") is not None else "text"
|
||||
json_results.append(json_result)
|
||||
|
||||
for table in tables:
|
||||
table_text = table[0][1] if table and table[0] else ""
|
||||
if table_text:
|
||||
json_results.append(
|
||||
{
|
||||
"text": table_text,
|
||||
"doc_type_kwd": "table",
|
||||
}
|
||||
)
|
||||
|
||||
enhance_media_sections_with_vision(
|
||||
json_results,
|
||||
self._canvas._tenant_id,
|
||||
@@ -962,7 +973,9 @@ class Parser(ProcessBase):
|
||||
)
|
||||
self.set_output("json", json_results)
|
||||
else:
|
||||
self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
|
||||
texts = [section_text for section_text, _ in sections if section_text]
|
||||
texts.extend(table[0][1] for table in tables if table and table[0] and table[0][1])
|
||||
self.set_output("text", "\n".join(texts))
|
||||
|
||||
def _code(self, name, blob, **kwargs):
|
||||
"""Parse text and source code files as plain text chunks."""
|
||||
|
||||
@@ -39,7 +39,7 @@ export enum EmailOutputFormat {
|
||||
}
|
||||
|
||||
export enum TextMarkdownOutputFormat {
|
||||
Text = 'text',
|
||||
Text = 'json',
|
||||
}
|
||||
|
||||
export enum TextJsonOutputFormat {
|
||||
|
||||
Reference in New Issue
Block a user