From 17e3aad7aea27673afcd57976c201f09e9261190 Mon Sep 17 00:00:00 2001 From: Wang Qi Date: Tue, 16 Jun 2026 20:11:45 +0800 Subject: [PATCH] Revert "fix: paginate non-DeepDOC PDF parsing tasks to prevent OOM" (#16104) Reverts infiniflow/ragflow#15951 --- api/db/services/task_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index 20887cab0f..10ff8c7c29 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -390,14 +390,15 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int): if doc["type"] == FileType.PDF.value: file_bin = settings.STORAGE_IMPL.get(bucket, name) + do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC") pages = PdfParser.total_page_number(doc["name"], file_bin) if pages is None: pages = 0 page_size = doc["parser_config"].get("task_page_size") or 12 if doc["parser_id"] == "paper": page_size = doc["parser_config"].get("task_page_size") or 22 - if doc["parser_id"] in ["one", "knowledge_graph"] or doc["parser_config"].get("toc_extraction", False): - page_size = doc["parser_config"].get("task_page_size") or 30 + if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc_extraction", False): + page_size = MAXIMUM_TASK_PAGE_NUMBER page_ranges = doc["parser_config"].get("pages") or [(1, MAXIMUM_PAGE_NUMBER)] for s, e in page_ranges: s -= 1