From 90758724357856accc2e9e39ce2d79195c06a6ee Mon Sep 17 00:00:00 2001 From: Idriss Sbaaoui <112825897+6ba3i@users.noreply.github.com> Date: Thu, 30 Apr 2026 11:55:02 +0800 Subject: [PATCH] Fix: Manual/Naive outline tuple unpack crash (#14518) ### What problem does this PR solve? This fixes a crash in Manual and Naive parsing when PDF outlines include page numbers as a third tuple value. It makes outline unpacking accept extra values so parsing no longer fails. fixes #14411 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/app/manual.py | 2 +- rag/app/naive.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rag/app/manual.py b/rag/app/manual.py index b9afdbf725..b3f5f2edc1 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -270,7 +270,7 @@ def chunk(filename, binary=None, from_page=0, to_page=MAXIMUM_PAGE_NUMBER, lang= if res and pdf_parser and getattr(pdf_parser, "outlines", None): res[0]["__outline__"] = [ {"title": title, "depth": depth} - for title, depth in pdf_parser.outlines + for title, depth, *_ in pdf_parser.outlines ] return res diff --git a/rag/app/naive.py b/rag/app/naive.py index 513f503b65..90d1b42858 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -1133,7 +1133,7 @@ def chunk(filename, binary=None, from_page=0, to_page=MAXIMUM_PAGE_NUMBER, lang= if res and pdf_parser and getattr(pdf_parser, "outlines", None): res[0]["__outline__"] = [ {"title": title, "depth": depth} - for title, depth in pdf_parser.outlines + for title, depth, *_ in pdf_parser.outlines ] return res