From 7f4bf69f0558e9a14b55360e073b67c241d26586 Mon Sep 17 00:00:00 2001 From: Idriss Sbaaoui <112825897+6ba3i@users.noreply.github.com> Date: Wed, 10 Jun 2026 11:44:22 +0800 Subject: [PATCH] Enhancement: slim Docker image, add .dockerignore, fix Go binary shipping (#15880) ### What problem does this PR solve? The RAGFlow Docker image was 9.06 GB with build-only compiler packages leaking into the runtime, duplicate frontend source shipped alongside compiled assets, and no .dockerignore causing ~6 GB of unnecessary context transfer per build. ### Type of change - [x] Performance Improvement --- .dockerignore | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 30 +++++++++++++++++++------ 2 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..c6517ce467 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,62 @@ +# RAGFlow .dockerignore +# Reduces Docker build context sent to the daemon. +# All excluded items are either rebuilt inside Docker, mounted from +# infiniflow/ragflow_deps, or are local-only artifacts. + +# ── Python virtual environments ───────────────────────────────────────────── +.venv/ +venv/ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +.pytest_cache/ + +# ── Frontend dependencies and build outputs ───────────────────────────────── +web/node_modules/ +web/dist/ + +# ── Runtime logs ──────────────────────────────────────────────────────────── +logs/ +*.log +docker/ragflow-logs/ + +# ── Docker runtime data ───────────────────────────────────────────────────── +docker/data/ +docker/oceanbase/ +docker/seekdb/ + +# ── Go and C++ build outputs ──────────────────────────────────────────────── +internal/cpp/build/ +internal/cpp/cmake-build-release/ +internal/cpp/cmake-build-debug/ +target/ + +# ── Downloaded dependency artifacts (mounted from infiniflow/ragflow_deps) ── +chrome-linux64-* +chromedriver-linux64-* +tika-server-standard-*.jar +tika-server-standard-*.jar.md5 +cl100k_base.tiktoken +libssl*.deb +uv-*.tar.gz +huggingface.co/ +nltk_data/ +9b5ad71b2ce5302211f9c61530b329a4922fc6a4 + +# ── IDE and editor config ────────────────────────────────────────────────── +.idea/ +.vscode/ +.cursor/ +.trae/ +.DS_Store + +# ── Test and coverage artifacts ───────────────────────────────────────────── +coverage/ +htmlcov/ +.coverage +.hypothesis/ +.nox/ + +# ── Docker env (contains secrets) ─────────────────────────────────────────── +docker/.env diff --git a/Dockerfile b/Dockerfile index af176fa664..1f81adb86b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,7 +43,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ chmod 1777 /tmp && \ apt update && \ apt install -y \ - build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive texlive-latex-extra texlive-xetex texlive-lang-chinese fonts-freefont-ttf fonts-noto-cjk postgresql-client + libglib2.0-0 libglx-mesa0 libgl1 pkg-config libgdiplus default-jdk libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive texlive-latex-extra texlive-xetex texlive-lang-chinese fonts-freefont-ttf fonts-noto-cjk postgresql-client && \ + rm -rf /var/lib/apt/lists/* # Download resource from GitHub to /usr/share/infinity RUN mkdir -p /usr/share/infinity/resource && \ @@ -62,7 +63,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ echo "deb [signed-by=/etc/apt/keyrings/nginx-archive-keyring.gpg] https://nginx.org/packages/mainline/ubuntu/ noble nginx" > /etc/apt/sources.list.d/nginx.list && \ apt -o Acquire::Retries=5 update && \ apt -o Acquire::Retries=5 install -y nginx=${NGINX_VERSION} && \ - apt-mark hold nginx + apt-mark hold nginx && \ + rm -rf /var/lib/apt/lists/* # Install uv RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ @@ -91,7 +93,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ apt purge -y nodejs npm && \ apt autoremove -y && \ apt update && \ - apt install -y nodejs + apt install -y nodejs && \ + rm -rf /var/lib/apt/lists/* # Add msssql ODBC driver # macOS ARM64 environment, install msodbcsql18. @@ -107,7 +110,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ else \ # x86_64 or others \ ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \ - fi || \ + fi && \ + rm -rf /var/lib/apt/lists/* || \ { echo "Failed to install ODBC driver"; exit 1; } @@ -136,6 +140,13 @@ USER root WORKDIR /ragflow +# Install build-only dependencies for compiling Python C extensions. +# These are not inherited from base to keep the production image smaller. +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + apt update && \ + apt install -y build-essential libpython3-dev libicu-dev libgbm-dev && \ + rm -rf /var/lib/apt/lists/* + # install dependencies from uv.lock file COPY pyproject.toml uv.lock ./ @@ -152,11 +163,17 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ # Ensure pip is available in the venv for runtime package installation (fixes #12651) .venv/bin/python3 -m ensurepip --upgrade +# Install frontend dependencies — depends only on package manifests so +# web source / docs changes don't invalidate this layer. +COPY web/package.json web/package-lock.json web/.npmrc ./web/ +RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \ + cd web && NODE_OPTIONS="--max-old-space-size=8192" npm install + +# Copy full web source and docs for the frontend build. COPY web web COPY docs docs RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \ - cd web && NODE_OPTIONS="--max-old-space-size=8192" npm install && \ - NODE_OPTIONS="--max-old-space-size=8192" VITE_BUILD_SOURCEMAP=false VITE_MINIFY=esbuild npm run build + cd web && NODE_OPTIONS="--max-old-space-size=8192" VITE_BUILD_SOURCEMAP=false VITE_MINIFY=esbuild npm run build COPY .git /ragflow/.git @@ -178,7 +195,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ENV PYTHONPATH=/ragflow/ -COPY web web COPY admin admin COPY api api COPY conf conf