From 4e48aba5c433d6219f8294ded6ce9a665c1288d1 Mon Sep 17 00:00:00 2001 From: Enes Delibalta Date: Fri, 27 Feb 2026 15:13:50 +0300 Subject: [PATCH] fix: update DoclingParser return type hint (#13243) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? The _transfer_to_sections method was throwing a type hint violation because it occasionally returns 3-item tuples instead of 2. Adjusted to list[tuple[str, ...]] to prevent runtime crashes. Error: 20:53:21 Page(1~10): [ERROR]Internal server error while chunking: Method deepdoc.parser.docling_parser.DoclingParser._transfer_to_sections() return [(1. JIRA Nasıl Kullanılır?, text, @@1\t70.8\t194.9\t70.9\t85.5##), (1.1. Proje O...##)] violates type hint list[tuple[str, str]], as list index 15 item tuple tuple (Gelen ekran üzerinden alanları isterlerine göre doldurduğunuz taktirde Create düğmesi i...##) length 3 != 2. 20:53:21 [ERROR][Exception]: Method deepdoc.parser.docling_parser.DoclingParser._transfer_to_sections() return [('1. JIRA Nasıl Kullanılır?', 'text', '@@1\t70.8\t194.9\t70.9\t85.5##'), ('1.1. Proje O...##')] violates type hint list[tuple[str, str]], as list index 15 item tuple tuple ('Gelen ekran üzerinden alanları isterlerine göre doldurduğunuz taktirde Create düğmesi i...##') length 3 != 2. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: Enes Delibalta --- deepdoc/parser/docling_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepdoc/parser/docling_parser.py b/deepdoc/parser/docling_parser.py index e8df1cfd4e..4416a3f8f5 100644 --- a/deepdoc/parser/docling_parser.py +++ b/deepdoc/parser/docling_parser.py @@ -194,8 +194,8 @@ class DoclingParser(RAGFlowPdfParser): bbox = _BBox(int(pn), bb[0], bb[1], bb[2], bb[3]) yield (DoclingContentType.EQUATION.value, text, bbox) - def _transfer_to_sections(self, doc, parse_method: str) -> list[tuple[str, str]]: - sections: list[tuple[str, str]] = [] + def _transfer_to_sections(self, doc, parse_method: str) -> list[tuple[str, ...]]: + sections: list[tuple[str, ...]] = [] for typ, payload, bbox in self._iter_doc_items(doc): if typ == DoclingContentType.TEXT.value: section = payload.strip()