diff --git a/agent/sandbox/providers/local.py b/agent/sandbox/providers/local.py index b8057fa5b4..1a82516dcf 100644 --- a/agent/sandbox/providers/local.py +++ b/agent/sandbox/providers/local.py @@ -41,6 +41,15 @@ ALLOWED_ARTIFACT_EXTENSIONS = { ".svg", } +LOCAL_PYTHON_THREAD_ENV_VARS = ( + "OPENBLAS_NUM_THREADS", + "OMP_NUM_THREADS", + "MKL_NUM_THREADS", + "NUMEXPR_NUM_THREADS", + "BLIS_NUM_THREADS", + "VECLIB_MAXIMUM_THREADS", +) + def _env_enabled(name: str) -> bool: return os.environ.get(name, "").strip().lower() in {"1", "true", "yes", "on"} @@ -226,13 +235,18 @@ class LocalProvider(SandboxProvider): return os.environ.get(env_name, default) def _build_child_env(self, instance_dir: Path) -> dict[str, str]: - return { + env = { "HOME": str(instance_dir), "MPLBACKEND": "Agg", "PATH": os.environ.get("PATH", ""), "PYTHONUNBUFFERED": "1", "TMPDIR": str(instance_dir), } + for name in LOCAL_PYTHON_THREAD_ENV_VARS: + value = os.environ.get(name) + if value is not None: + env[name] = value + return env def _limit_child_process(self) -> None: import resource diff --git a/docker/.env b/docker/.env index da46928795..5852383507 100644 --- a/docker/.env +++ b/docker/.env @@ -305,6 +305,14 @@ REGISTER_ENABLED=1 # SANDBOX_LOCAL_MAX_OUTPUT_BYTES=1048576 # SANDBOX_LOCAL_MAX_ARTIFACTS=20 # SANDBOX_LOCAL_MAX_ARTIFACT_BYTES=10485760 +# Limit native math library threads for local Python subprocesses if NumPy or +# OpenBLAS fails with `pthread_create failed` under tight thread limits. +# OPENBLAS_NUM_THREADS=1 +# OMP_NUM_THREADS=1 +# MKL_NUM_THREADS=1 +# NUMEXPR_NUM_THREADS=1 +# BLIS_NUM_THREADS=1 +# VECLIB_MAXIMUM_THREADS=1 # Enable DocLing USE_DOCLING=false diff --git a/docs/guides/agent/agent_quickstarts/sandbox_quickstart.md b/docs/guides/agent/agent_quickstarts/sandbox_quickstart.md index 115ffe8882..eff2aaa648 100644 --- a/docs/guides/agent/agent_quickstarts/sandbox_quickstart.md +++ b/docs/guides/agent/agent_quickstarts/sandbox_quickstart.md @@ -9,6 +9,8 @@ sidebar_custom_props: { A secure, pluggable code execution backend designed for RAGFlow and other applications requiring isolated code execution environments. +RAGFlow's `CodeExec` agent component depends on a sandbox provider to run Python and JavaScript code. Configure one of the providers below before using `CodeExec`. + ## Features: - Seamless RAGFlow Integration — Works out-of-the-box with the code component of RAGFlow. @@ -21,6 +23,13 @@ A secure, pluggable code execution backend designed for RAGFlow and other applic The architecture consists of isolated Docker base images for each supported language runtime, managed by the executor manager service. The executor manager orchestrates sandboxed code execution using gVisor for syscall interception and optional seccomp profiles for enhanced syscall filtering. +## Provider options + +RAGFlow supports two sandbox provider types: + +- `self_managed`: Runs code inside Docker-managed sandbox containers. Use this for the standard RAGFlow sandbox deployment. +- `local`: Runs code as local Python or Node.js subprocesses. Use this only in trusted development environments. + ## Prerequisites - Linux distribution compatible with gVisor. @@ -31,14 +40,16 @@ The architecture consists of isolated Docker base images for each supported lang - (Optional) GNU Make for simplified command-line management. :::tip NOTE -The error message `client version 1.43 is too old. Minimum supported API version is 1.44` indicates that your executor manager image's built-in Docker CLI version is lower than `29.1.0` required by the Docker daemon in use. To solve this issue, pull the latest `infiniflow/sandbox-executor-manager:latest` from Docker Hub or rebuild it in `./sandbox/executor_manager`. +The error message `client version 1.43 is too old. Minimum supported API version is 1.44` indicates that your executor manager image's built-in Docker CLI version is lower than `29.1.0` required by the Docker daemon in use. ::: ## Build Docker base images -The sandbox uses isolated base images for secure containerised execution environments. +The sandbox uses isolated base images for secure containerized execution environments. -Build the base images manually: +### Option 1: Build from source + +Build the runtime base images: ```bash docker build -t sandbox-base-python:latest ./sandbox_base_image/python @@ -51,20 +62,41 @@ Alternatively, build all base images at once using the Makefile: make build ``` -Next, build the executor manager image: +Build the executor manager image: ```bash docker build -t sandbox-executor-manager:latest ./executor_manager ``` +### Option 2: Pull base images from Docker Hub + +If you do not need to customize runtime dependencies, pull the published base images and tag them with the names used by standalone Docker Compose: + +```bash +docker pull infiniflow/sandbox-base-python:latest +docker pull infiniflow/sandbox-base-nodejs:latest + +docker tag infiniflow/sandbox-base-python:latest sandbox-base-python:latest +docker tag infiniflow/sandbox-base-nodejs:latest sandbox-base-nodejs:latest +``` + +Then restart the standalone sandbox services: + +```bash +docker compose -f docker-compose.yml down +docker compose -f docker-compose.yml up -d +``` + ## Running with RAGFlow 1. Verify that gVisor is properly installed and operational. 2. Configure the .env file located at docker/.env: -- Uncomment sandbox-related environment variables. -- Enable the sandbox profile at the bottom of the file. +- Set `SANDBOX_ENABLED=1`. +- Set `SANDBOX_PROVIDER_TYPE=self_managed` or `SANDBOX_PROVIDER_TYPE=local`. +- For `self_managed`, include `sandbox` in `COMPOSE_PROFILES`. +- For `local`, uncomment and adjust the `SANDBOX_LOCAL_*` variables. 3. Add the following entry to your /etc/hosts file to resolve the executor manager service: @@ -74,6 +106,49 @@ docker build -t sandbox-executor-manager:latest ./executor_manager 4. Start the RAGFlow service as usual. +## Environment variables + +The variables in `docker/.env` are grouped by scope. + +### Shared variables + +These variables apply to sandbox support in general: + +- `SANDBOX_ENABLED`: Enables sandbox support in RAGFlow. +- `SANDBOX_PROVIDER_TYPE`: Selects the active provider. Supported values are `self_managed` and `local`. +- `SANDBOX_HOST`: The executor manager host used by the self-managed provider and the legacy HTTP fallback. +- `SANDBOX_ARTIFACT_BUCKET`: MinIO bucket used for files generated by sandbox code. +- `SANDBOX_ARTIFACT_EXPIRE_DAYS`: Number of days before sandbox artifacts expire. + +### Self-managed variables + +These variables apply when `SANDBOX_PROVIDER_TYPE=self_managed`: + +- `COMPOSE_PROFILES`: Must include `sandbox` to start `sandbox-executor-manager` with RAGFlow. +- `SANDBOX_EXECUTOR_MANAGER_IMAGE`: Docker image for the executor manager service. +- `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE`: Number of Python and Node.js sandbox containers kept in the pool. +- `SANDBOX_BASE_PYTHON_IMAGE`: Python runtime image used by executor-managed containers. +- `SANDBOX_BASE_NODEJS_IMAGE`: Node.js runtime image used by executor-managed containers. +- `SANDBOX_EXECUTOR_MANAGER_PORT`: Host port exposed by the executor manager. +- `SANDBOX_ENABLE_SECCOMP`: Enables the optional seccomp profile for sandbox containers. +- `SANDBOX_MAX_MEMORY`: Memory limit for each sandbox runtime container. +- `SANDBOX_TIMEOUT`: Default execution timeout. + +### Local variables + +These variables apply when `SANDBOX_PROVIDER_TYPE=local`: + +- `SANDBOX_LOCAL_ENABLED`: Explicitly enables local code execution. +- `SANDBOX_LOCAL_PYTHON_BIN`: Python executable used by local execution. +- `SANDBOX_LOCAL_NODE_BIN`: Node.js executable used by local execution. +- `SANDBOX_LOCAL_WORK_DIR`: Working directory for local execution files and artifacts. +- `SANDBOX_LOCAL_TIMEOUT`: Maximum local execution time in seconds. +- `SANDBOX_LOCAL_MAX_MEMORY_MB`: Address-space memory limit for local child processes. +- `SANDBOX_LOCAL_MAX_OUTPUT_BYTES`: Maximum stdout and stderr size. +- `SANDBOX_LOCAL_MAX_ARTIFACTS`: Maximum number of artifacts collected after execution. +- `SANDBOX_LOCAL_MAX_ARTIFACT_BYTES`: Maximum size for each artifact. +- `OPENBLAS_NUM_THREADS`, `OMP_NUM_THREADS`, `MKL_NUM_THREADS`, `NUMEXPR_NUM_THREADS`, `BLIS_NUM_THREADS`, `VECLIB_MAXIMUM_THREADS`: Optional native math library thread limits for local Python subprocesses. + ## Running standalone ### Manual setup diff --git a/web/src/pages/agent/form-sheet/next.tsx b/web/src/pages/agent/form-sheet/next.tsx index 30c87d0551..245c680947 100644 --- a/web/src/pages/agent/form-sheet/next.tsx +++ b/web/src/pages/agent/form-sheet/next.tsx @@ -10,7 +10,7 @@ import { IModalProps } from '@/interfaces/common'; import { RAGFlowNodeType } from '@/interfaces/database/agent'; import { cn } from '@/lib/utils'; import { lowerFirst } from 'lodash'; -import { CirclePlay, X } from 'lucide-react'; +import { ArrowUpRight, CirclePlay, X } from 'lucide-react'; import { Operator } from '../constant'; import { AgentFormContext } from '../context'; import { RunTooltip } from '../flow-tooltip'; @@ -31,6 +31,8 @@ interface IProps { } const EmptyContent = () =>
; +const SandboxQuickstartUrl = + 'https://github.com/infiniflow/ragflow/blob/main/docs/guides/agent/agent_quickstarts/sandbox_quickstart.md'; const FormSheet = ({ visible, @@ -100,6 +102,24 @@ const FormSheet = ({ {t( `${lowerFirst(operatorName === Operator.Tool ? toolComponentName : operatorName)}Description`, )} + {operatorName === Operator.Code && ( + + )}

)}