2024-09-29 10:51:46 +08:00
# base stage
2025-12-09 19:55:25 +08:00
FROM ubuntu:24.04 AS base
2024-09-29 10:51:46 +08:00
USER root
2024-11-22 20:24:32 +08:00
SHELL [ "/bin/bash" , "-c" ]
2024-09-29 10:51:46 +08:00
2024-12-06 20:47:22 +08:00
ARG NEED_MIRROR = 0
2024-04-16 12:29:58 +08:00
WORKDIR /ragflow
Feature rtl support (#13118)
### What problem does this PR solve?
This PR adds comprehensive **Right-to-Left (RTL) language support**,
primarily targeting Arabic and other RTL scripts (Hebrew, Persian, Urdu,
etc.).
Previously, RTL content had multiple rendering issues:
- Incorrect sentence splitting for Arabic punctuation in citation logic
- Misaligned text in chat messages and markdown components
- Improper positioning of blockquotes and “think” sections
- Incorrect table alignment
- Citation placement ambiguity in RTL prompts
- UI layout inconsistencies when mixing LTR and RTL text
This PR introduces backend and frontend improvements to properly detect,
render, and style RTL content while preserving existing LTR behavior.
#### Backend
- Updated sentence boundary regex in `rag/nlp/search.py` to include
Arabic punctuation:
- `،` (comma)
- `؛` (semicolon)
- `؟` (question mark)
- `۔` (Arabic full stop)
- Ensures citation insertion works correctly in RTL sentences.
- Updated citation prompt instructions to clarify citation placement
rules for RTL languages.
#### Frontend
- Introduced a new utility: `text-direction.ts`
- Detects text direction based on Unicode ranges.
- Supports Arabic, Hebrew, Syriac, Thaana, and related scripts.
- Provides `getDirAttribute()` for automatic `dir` assignment.
- Applied dynamic `dir` attributes across:
- Markdown rendering
- Chat messages
- Search results
- Tables
- Hover cards and reference popovers
- Added proper RTL styling in LESS:
- Text alignment adjustments
- Blockquote border flipping
- Section indentation correction
- Table direction switching
- Use of `<bdi>` for figure labels to prevent bidirectional conflicts
#### DevOps / Environment
- Added Windows backend launch script with retry handling.
- Updated dependency metadata.
- Adjusted development-only React debugging behavior.
---
### Type of change
- [x] Bug Fix (non-breaking change which fixes RTL rendering and
citation issues)
- [x] New Feature (non-breaking change which adds RTL detection and
dynamic direction handling)
---------
Co-authored-by: 6ba3i <isbaaoui09@gmail.com>
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
Co-authored-by: Ahmad Intisar <168020872+ahmadintisar@users.noreply.github.com>
Co-authored-by: Liu An <asiro@qq.com>
2026-03-02 08:03:44 +03:00
# copy models downloaded via download_deps.py
2024-12-06 14:05:30 +08:00
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /huggingface.co,target= /huggingface.co \
tar --exclude= '.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
2025-12-04 14:53:57 +08:00
| tar -xf - --strip-components= 3 -C /ragflow/rag/res/deepdoc
2024-09-29 10:51:46 +08:00
2024-12-06 14:05:30 +08:00
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
cp -r /deps/nltk_data /root/ && \
2026-04-13 01:01:08 -07:00
cp /deps/tika-server-standard-3.3.0.jar /deps/tika-server-standard-3.3.0.jar.md5 /ragflow/ && \
2024-12-06 14:05:30 +08:00
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
2026-04-13 01:01:08 -07:00
ENV TIKA_SERVER_JAR = "file:///ragflow/tika-server-standard-3.3.0.jar"
2024-12-10 16:32:58 +08:00
ENV DEBIAN_FRONTEND = noninteractive
2024-12-06 14:05:30 +08:00
# Setup apt
2024-12-10 16:32:58 +08:00
# Python package and implicit dependencies:
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
2026-04-13 01:01:08 -07:00
# python-pptx: default-jdk tika-server-standard-3.3.0.jar
2024-12-10 16:32:58 +08:00
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2025-12-09 19:55:25 +08:00
apt update && \
apt --no-install-recommends install -y ca-certificates; \
2024-12-06 20:47:22 +08:00
if [ " $NEED_MIRROR " = = "1" ] ; then \
2026-06-28 08:34:22 -07:00
# CI runners may inject a proxy whose TLS certificate is not trusted inside
# the fresh Ubuntu base image yet. Keep the Ubuntu mirror on HTTP here so
# the mirror switch remains usable before the full CA store is available.
sed -i 's|http://archive.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
sed -i 's|http://security.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
2024-12-06 20:47:22 +08:00
fi ; \
2024-12-06 14:05:30 +08:00
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
2024-12-09 14:20:18 +08:00
chmod 1777 /tmp && \
2024-12-06 20:47:22 +08:00
apt update && \
2026-04-14 15:24:43 +08:00
apt install -y \
2026-06-10 11:44:22 +08:00
libglib2.0-0 libglx-mesa0 libgl1 pkg-config libgdiplus default-jdk libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive texlive-latex-extra texlive-xetex texlive-lang-chinese fonts-freefont-ttf fonts-noto-cjk postgresql-client && \
rm -rf /var/lib/apt/lists/*
2024-12-06 20:47:22 +08:00
2026-03-06 16:56:12 +08:00
# Download resource from GitHub to /usr/share/infinity
RUN mkdir -p /usr/share/infinity/resource && \
if [ " $NEED_MIRROR " = = "1" ] ; then \
git clone --depth 1 --single-branch https://gitee.com/infiniflow/resource /tmp/resource; \
else \
git clone --depth 1 --single-branch https://github.com/infiniflow/resource.git /tmp/resource; \
fi && \
cp -r /tmp/resource/* /usr/share/infinity/resource && \
rm -rf /tmp/resource
2026-05-20 10:47:39 +09:00
ARG NGINX_VERSION = 1 .31.0-1~noble
2026-02-06 12:55:06 +08:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
mkdir -p /etc/apt/keyrings && \
2026-03-16 15:03:25 +08:00
curl --retry 5 --retry-delay 2 --retry-all-errors -fsSL https://nginx.org/keys/nginx_signing.key | gpg --dearmor -o /etc/apt/keyrings/nginx-archive-keyring.gpg && \
2026-02-06 12:55:06 +08:00
echo "deb [signed-by=/etc/apt/keyrings/nginx-archive-keyring.gpg] https://nginx.org/packages/mainline/ubuntu/ noble nginx" > /etc/apt/sources.list.d/nginx.list && \
2026-03-16 15:03:25 +08:00
apt -o Acquire::Retries= 5 update && \
apt -o Acquire::Retries= 5 install -y nginx = ${ NGINX_VERSION } && \
2026-06-10 11:44:22 +08:00
apt-mark hold nginx && \
rm -rf /var/lib/apt/lists/*
2026-02-06 12:55:06 +08:00
2025-12-09 16:23:37 +08:00
# Install uv
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
if [ " $NEED_MIRROR " = = "1" ] ; then \
2025-01-14 11:49:43 +08:00
mkdir -p /etc/uv && \
2025-12-09 16:23:37 +08:00
echo 'python-install-mirror = "https://registry.npmmirror.com/-/binary/python-build-standalone/"' > /etc/uv/uv.toml && \
echo '[[index]]' >> /etc/uv/uv.toml && \
2026-03-16 12:12:25 +08:00
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
2025-12-09 16:23:37 +08:00
echo 'default = true' >> /etc/uv/uv.toml; \
2024-12-06 20:47:22 +08:00
fi ; \
2026-01-13 15:37:32 +08:00
arch = " $( uname -m) " ; \
if [ " $arch " = "x86_64" ] ; then uv_arch = "x86_64" ; else uv_arch = "aarch64" ; fi ; \
tar xzf " /deps/uv- ${ uv_arch } -unknown-linux-gnu.tar.gz " \
&& cp " uv- ${ uv_arch } -unknown-linux-gnu/ " * /usr/local/bin/ \
&& rm -rf " uv- ${ uv_arch } -unknown-linux-gnu " \
2026-05-15 08:40:53 +02:00
&& uv python install 3.13
2024-09-29 18:24:24 +08:00
2026-03-16 15:03:25 +08:00
ENV PYTHONDONTWRITEBYTECODE = 1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT = 1 \
UV_HTTP_TIMEOUT = 200 \
UV_HTTP_RETRIES = 3
2024-11-02 22:21:17 +08:00
ENV PATH = /root/.local/bin:$PATH
2024-09-29 10:51:46 +08:00
2024-11-19 18:25:04 +08:00
# nodejs 12.22 on Ubuntu 22.04 is too old
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2024-11-19 18:25:04 +08:00
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
2026-03-24 17:04:57 +08:00
apt purge -y nodejs npm && \
2025-01-28 09:51:16 +01:00
apt autoremove -y && \
2024-11-19 18:25:04 +08:00
apt update && \
2026-06-10 11:44:22 +08:00
apt install -y nodejs && \
rm -rf /var/lib/apt/lists/*
2025-01-14 11:49:43 +08:00
feat(agent): align Go agent behavior with Python (except retrieval component) (#16225)
## Summary
Aligns the **Go agent runtime/canvas/components/tools** behavior with
the **Python `agent/` implementation** so the same stored canvas DSL
produces the same execution result on either side. Every component,
tool, and runtime primitive in `internal/agent/` is now driven by the
same semantics as its Python counterpart — variable resolution, template
substitution, control flow, error reporting, retry/cancel, and stream
event shapes.
The **retrieval component is the one explicit exception** in this PR. It
is being reworked in a separate change and is excluded from this
alignment pass; the wrapper slot (`universe_a_wrappers.go →
newRetrievalComponent`) is preserved.
## Scope of alignment
### Components (all aligned with `agent/component/`)
`Begin` · `Message` · `LLM` (incl. ChatTemplateKwargs,
MessageHistoryWindowSize, VisualFiles, Cite, OutputStructure,
JSONOutput, TopP, MaxRetries, DelayAfterError, credentials) · `Agent`
(react + tool artifact capture + `Reset()` interface-assert) · `Switch`
(12/12 operators, Python-equivalent semantics) · `Categorize` · `Invoke`
· `Iteration` · `Loop` (macro-expansion through `workflowx.AddLoopNode`)
· `UserFillUp` (Python-equivalent interrupt/resume via eino
`compose.Interrupt`/`ResumeWithData`) · `FillUp` · `DataOperations` ·
`ListOperations` · `StringTransform` · `VariableAggregator` ·
`VariableAssigner` · `Browser` (full stagehand runtime parity) ·
`DocsGenerator` · `ExcelProcessor`.
### Tools (all aligned with `agent/tools/`)
`Retrieval` (wrapper slot only — logic out of scope) · `MCPToolAdapter`
(streamable-HTTP) · `CodeExec` (sandbox bridge with
`code_exec_contract.go` matching Python contract) · `AkShare` · `ArXiv`
· `Crawler` · `DeepL` · `DuckDuckGo` · `Email` · `ExeSQL` · `GitHub` ·
`Google` · `GoogleScholar` · `Jin10` · `PubMed` · `QWeather` · `SearXNG`
· `Tavily` · `Tushare` · `Wencai` · `Wikipedia` · `YahooFinance` —
uniform `eino tool.InvokableTool` interface, SSRF protection, shared
HTTP client.
### Canvas execution engine (`internal/agent/canvas/`)
Aligned with Python's `agent/canvas.py`:
- **Scheduler** (`scheduler.go`): state pre/post handlers, node lambdas,
per-component timeout resolver (4-level: per-class env → per-class table
→ uniform env → 600s fallback), `legacyNoOpNames`.
- **Loop subgraph** (`loop_subgraph.go`): Python-equivalent
`AddLoopNode` macro expansion + condition translation.
- **Multibranch** (`multibranch.go`): `Switch` / `Categorize` routing
via `compose.NewGraphMultiBranch` — same branch selection semantics as
Python.
- **Parallel subgraph** (`parallel_subgraph.go`): matches Python's
parallel fan-out contract.
- **Interrupt/Resume** (`interrupt_resume.go`): `UserFillUpNodeBody` /
`IsInterruptError` / `ExtractInterruptContexts` — replaces the
deprecated Python sentinel chain with eino's native interrupt API,
preserving the same external behavior.
- **Checkpoint** (`checkpoint_store.go`): `RedisCheckPointStore`
Get/Set/Delete, with business metadata (status / canvas_id /
parent_run_id) on a parallel Redis Hash.
- **RunTracker** (`run_tracker.go`): Start / MarkSucceeded / MarkFailed
/ MarkCancelled / AttachCheckpoint — same lifecycle as the Python run
record.
- **Cancel** (`cancel.go`): Redis pub/sub watch.
- **Stream** (`stream.go`): SSE channel with `messages` / `waiting` /
`errors` / `done` events, same shape as Python's `agent.canvas.RunEvent`
payload.
### DSL bridge (`internal/agent/dsl/`)
- `normalize.go`: v1↔v2 collapsed into a single wire format — Python and
Go consume the same stored JSON.
- `reset.go`: per-run state reset matches Python's `Canvas.reset()`
semantics.
- Testdata mirrors Python's `agent_msg.json` / `all.json` / etc.
### Runtime (`internal/agent/runtime/`)
- `CanvasState` / `NewCanvasState` / `GetVar` / `SetVar` / `ReadVars`:
same `{{cpn_id@param}}` resolution model.
- `ResolveTemplate` (regex fast path + gonja fallback) — Python
Jinja-style semantics.
- `selector.go`, `metrics.go`, `component.go`: shared runtime contracts.
## Out of scope (intentionally)
- **`Retrieval` component logic** — wrapped only; full parity lands in a
follow-up PR.
- **Frontend** — only minor dsl-bridge / canvas UX fixes ride along.
- **CLI / admin / model registry** — orthogonal to agent behavior.
## How alignment is verified
`internal/service/agent_run_e2e_test.go` exercises the **full production
chain** against real Python-shaped DSL fixtures:
```
loadCanvasForUser → versionDAO.GetLatest → decodeCanvasFromDSL →
canvas.Compile → cc.Workflow.Invoke → answer extraction
```
using in-memory SQLite + miniredis (no Docker). Covers:
- `TestRunAgent_RealCanvas_BeginMessage` — happy path, `{{sys.query}}`
resolution
- `TestRunAgent_RealCanvas_WaitForUserResume` — two-run resume cycle
(Python-equivalent)
- `TestRunAgent_RealCanvas_CompileFails` — unknown component name →
sanitized error (Python-equivalent)
- `TestRunAgent_RealCanvas_InvokeFails` — unresolvable template ref
(Python-equivalent)
- `TestRunAgent_RunTracker_AttachCheckpoint_CallSequence` —
Start→AttachCheckpoint→MarkSucceeded lifecycle
`internal/handler/agent_test.go` — SSE streaming parity (`Content-Type:
text/event-stream`, `data: {…}\n\n`, trailing `data: [DONE]\n\n`,
OpenAI-compatible non-stream `choices`).
`internal/agent/canvas/fixture_compile_test.go` + per-component tests
pin the Python-equivalent outputs.
```
go test -count=1 -v -run 'TestRunAgent_RealCanvas|TestRunAgent_RunTracker' ./internal/service/
```
## Design reference
`docs/develop/agent-go-port-design.md` (1329 lines, last cross-checked
2026-06-17) — module layout, per-component / per-tool inventory,
corner-case catalogue, and the actionable backlog (Section 14, including
the retrieval alignment follow-up).
---------
Co-authored-by: Claude <noreply@anthropic.com>
2026-06-22 11:58:29 +08:00
# stagehand-server-v3 (Node.js SEA binary used by Browser component
# in local mode).
#
# The `v3.21.0` value below is the `stagehand-go/v3` Go module
# version pinned in `go.mod`. It is used here only to compute the
# `go_<ver>/` subdirectory that `local.go:cacheDir()` will look in
# for the binary at runtime — that subdirectory name is keyed by
# the Go module's own `internal.PackageVersion`, NOT by the server
# binary's release tag.
#
# The server binary itself is fetched separately by `download_deps.py`
# from the browserbase/stagehand GitHub releases. The two are
# LOOSELY MATCHED — both stay on the v3.x line and remain protocol-
# compatible, but the version numbers do NOT track each other (Go
# SDK is at v3.21.0, server binary is at v3.7.2 today). On every
# go.mod bump, refresh the server binary pin in `download_deps.py`
# to the current latest server release; no version correspondence
# is required to maintain.
#
# Drift on the Go SDK pin (this ARG vs go.mod) forces a fresh
# GitHub download at process boot — a hard failure in air-gapped
# deployments. CI cross-checks the two values.
#
# The binary is pre-fetched by `download_deps.py` and shipped via
# the ragflow_deps image, then written directly to the stagehand-go
# cache path that `local.go:cacheDir()` constructs at runtime —
# `/root/.cache/stagehand/lib/go_<ver>/stagehand-server-v3-<arch>`.
ARG STAGEHAND_GO_VERSION = v3.21.0
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
set -eux; \
arch = " $( uname -m) " ; \
case " $arch " in \
x86_64) stagehand_arch = x64 ; ; \
aarch64| arm64) stagehand_arch = arm64 ; ; \
*) echo " Unsupported architecture: $arch " >& 2; exit 1 ; ; \
esac ; \
stagehand_version = " ${ STAGEHAND_GO_VERSION #v } " ; \
stagehand_cache_dir = " /root/.cache/stagehand/lib/go_ ${ stagehand_version } " ; \
mkdir -p " ${ stagehand_cache_dir } " ; \
cp " /deps/stagehand-server-v3-linux- ${ stagehand_arch } " \
" ${ stagehand_cache_dir } /stagehand-server-v3-linux- ${ stagehand_arch } " ; \
chmod +x " ${ stagehand_cache_dir } /stagehand-server-v3-linux- ${ stagehand_arch } "
2024-12-17 17:44:51 +08:00
# Add msssql ODBC driver
# macOS ARM64 environment, install msodbcsql18.
# general x86_64 environment, install msodbcsql17.
2024-12-12 18:26:44 +07:00
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
2024-12-17 17:44:51 +08:00
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
2024-12-12 18:26:44 +07:00
apt update && \
2025-01-28 09:51:16 +01:00
arch = " $( uname -m) " ; \
if [ " $arch " = "arm64" ] || [ " $arch " = "aarch64" ] ; then \
2025-12-16 19:29:37 +08:00
# ARM64 (macOS/Apple Silicon or Linux aarch64) \
2024-12-17 17:44:51 +08:00
ACCEPT_EULA = Y apt install -y unixodbc-dev msodbcsql18; \
else \
2025-12-16 19:29:37 +08:00
# x86_64 or others \
2024-12-17 17:44:51 +08:00
ACCEPT_EULA = Y apt install -y unixodbc-dev msodbcsql17; \
2026-06-10 11:44:22 +08:00
fi && \
rm -rf /var/lib/apt/lists/* || \
2024-12-17 17:44:51 +08:00
{ echo "Failed to install ODBC driver" ; exit 1; }
2024-12-12 18:26:44 +07:00
2024-11-19 18:25:04 +08:00
2024-12-06 14:05:30 +08:00
# Add dependencies of selenium
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /chrome-linux64-121-0-6167-85,target= /chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /chromedriver-linux64-121-0-6167-85,target= /chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome
RUN --mount= type = bind,from= infiniflow/ragflow_deps:latest,source= /,target= /deps \
if [ " $( uname -m) " = "x86_64" ] ; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ " $( uname -m) " = "aarch64" ] ; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi
2024-09-29 10:51:46 +08:00
# builder stage
FROM base AS builder
USER root
WORKDIR /ragflow
2026-06-10 11:44:22 +08:00
# Install build-only dependencies for compiling Python C extensions.
# These are not inherited from base to keep the production image smaller.
RUN --mount= type = cache,id= ragflow_apt,target= /var/cache/apt,sharing= locked \
apt update && \
apt install -y build-essential libpython3-dev libicu-dev libgbm-dev && \
rm -rf /var/lib/apt/lists/*
2025-01-14 11:49:43 +08:00
# install dependencies from uv.lock file
COPY pyproject.toml uv.lock ./
2024-11-29 13:37:50 +08:00
2025-01-17 12:01:04 +08:00
# https://github.com/astral-sh/uv/issues/10462
# uv records index url into uv.lock but doesn't failover among multiple indexes
feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00
# Also rewrite pypi.tuna.tsinghua.edu.cn to mirrors.aliyun.com/pypi so locks
# that were resolved against the Tsinghua mirror (e.g. when UV_INDEX pointed
# there) get normalized to the Aliyun mirror in NEED_MIRROR=1 builds. Without
# this, stale Tsinghua URLs slip through and `uv sync --frozen` 404s on
# packages that the Tsinghua mirror no longer carries.
2025-01-14 11:49:43 +08:00
RUN --mount= type = cache,id= ragflow_uv,target= /root/.cache/uv,sharing= locked \
2025-01-17 12:01:04 +08:00
if [ " $NEED_MIRROR " = = "1" ] ; then \
2026-03-16 12:12:25 +08:00
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00
sed -i 's|pypi.tuna.tsinghua.edu.cn|mirrors.aliyun.com/pypi|g' uv.lock; \
2025-01-17 12:01:04 +08:00
else \
2026-03-16 12:12:25 +08:00
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
2026-06-05 16:10:52 +08:00
sed -i 's|gitee.com|github.com|g' uv.lock; \
2025-01-17 12:01:04 +08:00
fi ; \
feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00
# --refresh-package litellm forces a re-download of litellm from the
# (post-sed) URLs in uv.lock even if BuildKit's persistent uv cache mount
# holds a stale wheel from a previous build. litellm 1.88.x has had
# multiple internal ImportError issues (1.88.1 missing
# DEFAULT_HEALTH_CHECK_STALENESS_MULTIPLIER, 1.88.0 wheel pulled via
# some proxies missing RedisPipelineLpopOperation) — always re-fetching
# the locked version avoids serving a half-broken cached copy.
uv sync --python 3.13 --frozen --refresh-package litellm && \
2026-01-19 19:08:14 +11:00
# Ensure pip is available in the venv for runtime package installation (fixes #12651)
.venv/bin/python3 -m ensurepip --upgrade
2024-11-29 13:37:50 +08:00
2026-06-10 11:44:22 +08:00
# Install frontend dependencies — depends only on package manifests so
# web source / docs changes don't invalidate this layer.
COPY web/package.json web/package-lock.json web/.npmrc ./web/
RUN --mount= type = cache,id= ragflow_npm,target= /root/.npm,sharing= locked \
cd web && NODE_OPTIONS = "--max-old-space-size=8192" npm install
# Copy full web source and docs for the frontend build.
2024-11-29 13:37:50 +08:00
COPY web web
COPY docs docs
2024-12-06 14:05:30 +08:00
RUN --mount= type = cache,id= ragflow_npm,target= /root/.npm,sharing= locked \
2026-06-10 11:44:22 +08:00
cd web && NODE_OPTIONS = "--max-old-space-size=8192" VITE_BUILD_SOURCEMAP = false VITE_MINIFY = esbuild npm run build
2024-11-29 13:37:50 +08:00
2024-11-22 20:24:32 +08:00
COPY .git /ragflow/.git
2024-12-07 16:56:34 +08:00
RUN version_info = $( git describe --tags --match= v* --first-parent --always) ; \
2025-10-23 23:02:27 +08:00
version_info = " $version_info " ; \
2024-11-25 14:09:42 +08:00
echo " RAGFlow version: $version_info " ; \
2024-11-22 20:24:32 +08:00
echo $version_info > /ragflow/VERSION
2024-09-29 10:51:46 +08:00
# production stage
FROM base AS production
USER root
WORKDIR /ragflow
2024-11-29 13:37:50 +08:00
# Copy Python environment and packages
ENV VIRTUAL_ENV = /ragflow/.venv
COPY --from= builder ${ VIRTUAL_ENV } ${ VIRTUAL_ENV }
ENV PATH = " ${ VIRTUAL_ENV } /bin: ${ PATH } "
2024-11-22 20:24:32 +08:00
2024-04-16 12:29:58 +08:00
ENV PYTHONPATH = /ragflow/
2025-10-13 19:05:54 +08:00
COPY admin admin
2024-11-29 13:37:50 +08:00
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
2025-01-14 11:49:43 +08:00
COPY pyproject.toml uv.lock ./
2025-04-21 09:43:20 +08:00
COPY mcp mcp
2025-10-28 09:46:32 +08:00
COPY common common
2025-12-25 21:18:13 +08:00
COPY memory memory
2026-03-10 17:31:20 +08:00
COPY bin bin
2026-05-29 17:39:41 +08:00
COPY tools/scripts tools/scripts
2024-11-29 13:37:50 +08:00
2024-11-12 15:56:53 +01:00
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
2025-03-28 01:39:34 -03:00
COPY docker/entrypoint.sh ./
2025-01-20 22:49:46 +08:00
RUN chmod +x ./entrypoint*.sh
2024-04-16 12:29:58 +08:00
2026-03-19 10:25:35 +08:00
# Copy nginx configuration for frontend serving
COPY docker/nginx/ragflow.conf.golang docker/nginx/ragflow.conf.python docker/nginx/ragflow.conf.hybrid docker/nginx/nginx.conf docker/nginx/proxy.conf /etc/nginx/
RUN mv /etc/nginx/ragflow.conf.golang /etc/nginx/conf.d/ragflow.conf.golang && \
mv /etc/nginx/ragflow.conf.python /etc/nginx/conf.d/ragflow.conf.python && \
mv /etc/nginx/ragflow.conf.hybrid /etc/nginx/conf.d/ragflow.conf.hybrid && \
rm -f /etc/nginx/sites-enabled/default
2024-11-29 13:37:50 +08:00
# Copy compiled web pages
COPY --from= builder /ragflow/web/dist /ragflow/web/dist
COPY --from= builder /ragflow/VERSION /ragflow/VERSION
2024-09-29 10:51:46 +08:00
ENTRYPOINT [ "./entrypoint.sh" ]