mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Revert "fix: paginate non-DeepDOC PDF parsing tasks to prevent OOM" (#16104)
Reverts infiniflow/ragflow#15951
This commit is contained in:
@@ -390,14 +390,15 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
|
||||
if doc["type"] == FileType.PDF.value:
|
||||
file_bin = settings.STORAGE_IMPL.get(bucket, name)
|
||||
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC")
|
||||
pages = PdfParser.total_page_number(doc["name"], file_bin)
|
||||
if pages is None:
|
||||
pages = 0
|
||||
page_size = doc["parser_config"].get("task_page_size") or 12
|
||||
if doc["parser_id"] == "paper":
|
||||
page_size = doc["parser_config"].get("task_page_size") or 22
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or doc["parser_config"].get("toc_extraction", False):
|
||||
page_size = doc["parser_config"].get("task_page_size") or 30
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc_extraction", False):
|
||||
page_size = MAXIMUM_TASK_PAGE_NUMBER
|
||||
page_ranges = doc["parser_config"].get("pages") or [(1, MAXIMUM_PAGE_NUMBER)]
|
||||
for s, e in page_ranges:
|
||||
s -= 1
|
||||
|
||||
Reference in New Issue
Block a user