From 3a829fb6dd404244944e00bc69e51abd7ac4812e Mon Sep 17 00:00:00 2001 From: Wang Qi Date: Fri, 26 Jun 2026 20:15:25 +0800 Subject: [PATCH] Fix VLM PDF parser only parse first 12 pages, and default page range for PDF files align with backend (#16394) 1. Fix VLM parser only parse first 12 pages 2. Fix frontend default pages 1 - 100000, keep aligned with backend. --- deepdoc/parser/pdf_parser.py | 2 +- web/src/components/chunk-method-dialog/index.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index e409d5556b..cfe2c3e203 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -2053,7 +2053,7 @@ class VisionParser(RAGFlowPdfParser): all_docs = [] for idx, img_binary in enumerate(self.page_images or []): - pdf_page_num = idx # 0-based + pdf_page_num = from_page + idx # 0-based if pdf_page_num < start_page or pdf_page_num >= end_page: continue diff --git a/web/src/components/chunk-method-dialog/index.tsx b/web/src/components/chunk-method-dialog/index.tsx index 21650d7e6d..63aaec7503 100644 --- a/web/src/components/chunk-method-dialog/index.tsx +++ b/web/src/components/chunk-method-dialog/index.tsx @@ -247,7 +247,7 @@ export function ChunkMethodDialog({ pipeline_id: pipelineId || '', parseType: pipelineId ? ParseType.Pipeline : ParseType.BuiltIn, parser_config: fillDefaultParserValue({ - pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }], + pages: pages.length > 0 ? pages : [{ from: 1, to: 100000 }], ...omit(parserConfig, 'pages'), image_table_context_window: parserConfig?.image_table_context_window ??