diff --git a/Dockerfile b/Dockerfile index 1798933d06..b89cb266a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,16 +19,16 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ cp -r /deps/nltk_data /root/ && \ - cp /deps/tika-server-standard-3.2.3.jar /deps/tika-server-standard-3.2.3.jar.md5 /ragflow/ && \ + cp /deps/tika-server-standard-3.3.0.jar /deps/tika-server-standard-3.3.0.jar.md5 /ragflow/ && \ cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.2.3.jar" +ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.3.0.jar" ENV DEBIAN_FRONTEND=noninteractive # Setup apt # Python package and implicit dependencies: # opencv-python: libglib2.0-0 libglx-mesa0 libgl1 -# python-pptx: default-jdk tika-server-standard-3.2.3.jar +# python-pptx: default-jdk tika-server-standard-3.3.0.jar # selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ diff --git a/Dockerfile.deps b/Dockerfile.deps index 591b99eb83..8444e4a2c0 100644 --- a/Dockerfile.deps +++ b/Dockerfile.deps @@ -3,7 +3,7 @@ FROM scratch # Copy resources downloaded via download_deps.py -COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.2.3.jar tika-server-standard-3.2.3.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz uv-aarch64-unknown-linux-gnu.tar.gz / +COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.3.0.jar tika-server-standard-3.3.0.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz uv-aarch64-unknown-linux-gnu.tar.gz / COPY nltk_data /nltk_data diff --git a/download_deps.py b/download_deps.py index cbaf0a6373..b707e03622 100644 --- a/download_deps.py +++ b/download_deps.py @@ -23,8 +23,8 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]: return [ "http://mirrors.tuna.tsinghua.edu.cn/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", "http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", - "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.2.3/tika-server-standard-3.2.3.jar", - "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.2.3/tika-server-standard-3.2.3.jar.md5", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.3.0/tika-server-standard-3.3.0.jar", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.3.0/tika-server-standard-3.3.0.jar.md5", "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", ["https://registry.npmmirror.com/-/binary/chrome-for-testing/121.0.6167.85/linux64/chrome-linux64.zip", "chrome-linux64-121-0-6167-85"], ["https://registry.npmmirror.com/-/binary/chrome-for-testing/121.0.6167.85/linux64/chromedriver-linux64.zip", "chromedriver-linux64-121-0-6167-85"], @@ -35,8 +35,8 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]: return [ "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.2.3/tika-server-standard-3.2.3.jar", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.2.3/tika-server-standard-3.2.3.jar.md5", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.3.0/tika-server-standard-3.3.0.jar", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.3.0/tika-server-standard-3.3.0.jar.md5", "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", ["https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chrome-linux64.zip", "chrome-linux64-121-0-6167-85"], ["https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chromedriver-linux64.zip", "chromedriver-linux64-121-0-6167-85"],