diff --git a/pyproject.toml b/pyproject.toml index 0665a1c536..73006ac28f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,8 +81,7 @@ dependencies = [ "pyobvector==0.2.22", "pyodbc>=5.2.0,<6.0.0", "pypandoc>=1.16", - "pypdf>=6.6.2", - "pypdf2>=3.0.1,<4.0.0", + "pypdf>=6.7.5", "python-calamine>=0.4.0", "python-docx>=1.1.2,<2.0.0", "python-pptx>=1.0.2,<2.0.0", diff --git a/rag/app/presentation.py b/rag/app/presentation.py index 909fd61a30..390955041a 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -20,7 +20,7 @@ import re from collections import defaultdict from io import BytesIO -from PyPDF2 import PdfReader as pdf2_read +from pypdf import PdfReader as pdf2_read from deepdoc.parser import PdfParser, PlainParser from deepdoc.parser.ppt_parser import RAGFlowPptParser diff --git a/rag/utils/file_utils.py b/rag/utils/file_utils.py index 8d19079b76..c9ec50a36a 100644 --- a/rag/utils/file_utils.py +++ b/rag/utils/file_utils.py @@ -21,7 +21,7 @@ import requests from requests.exceptions import Timeout, RequestException from io import BytesIO from typing import List, Union, Tuple, Optional, Dict -import PyPDF2 +import pypdf as PyPDF2 from docx import Document import olefile diff --git a/uv.lock b/uv.lock index 0b1423a014..3432723677 100644 --- a/uv.lock +++ b/uv.lock @@ -5760,20 +5760,11 @@ wheels = [ [[package]] name = "pypdf" -version = "6.7.4" +version = "6.7.5" source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/dc/f52deef12797ad58b88e4663f097a343f53b9361338aef6573f135ac302f/pypdf-6.7.4.tar.gz", hash = "sha256:9edd1cd47938bb35ec87795f61225fd58a07cfaf0c5699018ae1a47d6f8ab0e3", size = 5304821, upload-time = "2026-02-27T10:44:39.395Z" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/52/37cc0aa9e9d1bf7729a737a0d83f8b3f851c8eb137373d9f71eafb0a3405/pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d", size = 5304278, upload-time = "2026-03-02T09:05:21.464Z" } wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/be/cded021305f5c81b47265b8c5292b99388615a4391c21ff00fd538d34a56/pypdf-6.7.4-py3-none-any.whl", hash = "sha256:527d6da23274a6c70a9cb59d1986d93946ba8e36a6bc17f3f7cce86331492dda", size = 331496, upload-time = "2026-02-27T10:44:37.527Z" }, -] - -[[package]] -name = "pypdf2" -version = "3.0.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/bb/18dc3062d37db6c491392007dfd1a7f524bb95886eb956569ac38a23a784/PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440", size = 227419, upload-time = "2022-12-31T10:36:13.13Z" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572, upload-time = "2022-12-31T10:36:10.327Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/89/336673efd0a88956562658aba4f0bbef7cb92a6fbcbcaf94926dbc82b408/pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13", size = 331421, upload-time = "2026-03-02T09:05:19.722Z" }, ] [[package]] @@ -6323,7 +6314,6 @@ dependencies = [ { name = "pyodbc" }, { name = "pypandoc" }, { name = "pypdf" }, - { name = "pypdf2" }, { name = "python-calamine" }, { name = "python-docx" }, { name = "python-gitlab" }, @@ -6462,8 +6452,7 @@ requires-dist = [ { name = "pyobvector", specifier = "==0.2.22" }, { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" }, { name = "pypandoc", specifier = ">=1.16" }, - { name = "pypdf", specifier = ">=6.6.2" }, - { name = "pypdf2", specifier = ">=3.0.1,<4.0.0" }, + { name = "pypdf", specifier = ">=6.7.5" }, { name = "python-calamine", specifier = ">=0.4.0" }, { name = "python-docx", specifier = ">=1.1.2,<2.0.0" }, { name = "python-gitlab", specifier = ">=7.0.0" },