mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: Manual/Naive outline tuple unpack crash (#14518)
### What problem does this PR solve? This fixes a crash in Manual and Naive parsing when PDF outlines include page numbers as a third tuple value. It makes outline unpacking accept extra values so parsing no longer fails. fixes #14411 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -270,7 +270,7 @@ def chunk(filename, binary=None, from_page=0, to_page=MAXIMUM_PAGE_NUMBER, lang=
|
||||
if res and pdf_parser and getattr(pdf_parser, "outlines", None):
|
||||
res[0]["__outline__"] = [
|
||||
{"title": title, "depth": depth}
|
||||
for title, depth in pdf_parser.outlines
|
||||
for title, depth, *_ in pdf_parser.outlines
|
||||
]
|
||||
return res
|
||||
|
||||
|
||||
@@ -1133,7 +1133,7 @@ def chunk(filename, binary=None, from_page=0, to_page=MAXIMUM_PAGE_NUMBER, lang=
|
||||
if res and pdf_parser and getattr(pdf_parser, "outlines", None):
|
||||
res[0]["__outline__"] = [
|
||||
{"title": title, "depth": depth}
|
||||
for title, depth in pdf_parser.outlines
|
||||
for title, depth, *_ in pdf_parser.outlines
|
||||
]
|
||||
|
||||
return res
|
||||
|
||||
Reference in New Issue
Block a user