2025-01-14 11:49:43 +08:00
[ project ]
2024-09-23 10:00:44 +08:00
name = "ragflow"
2026-02-10 17:24:03 +08:00
version = "0.24.0"
2024-09-23 10:00:44 +08:00
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
2025-05-14 21:23:29 -07:00
authors = [ { name = "Zhichang Yu" , email = "yuzhichang@gmail.com" } ]
license-files = [ "LICENSE" ]
2024-09-23 10:00:44 +08:00
readme = "README.md"
2025-12-09 19:55:25 +08:00
requires-python = ">=3.12,<3.15"
2025-01-14 11:49:43 +08:00
dependencies = [
2025-12-16 12:40:03 +08:00
"aiosmtplib>=5.0.0" ,
2025-01-20 11:17:59 +08:00
"akshare>=1.15.78,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"anthropic==0.34.1" ,
"arxiv==2.1.3" ,
2025-11-03 19:59:18 +08:00
"atlassian-python-api==4.0.7" ,
2025-12-16 12:40:03 +08:00
"azure-identity==1.17.1" ,
"azure-storage-file-datalake==12.16.0" ,
2025-11-22 11:56:43 +08:00
"beartype>=0.20.0,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"bio==1.7.1" ,
2025-12-16 12:40:03 +08:00
"boxsdk>=10.1.0" ,
"captcha>=0.7.1" ,
2026-02-26 10:24:13 +08:00
"chardet>=5.2.0,<6.0.0" ,
2025-01-14 11:49:43 +08:00
"cn2an==0.5.22" ,
"cohere==5.6.2" ,
2026-03-13 06:17:39 +03:00
"Crawl4AI>=0.8.0,<1.0.0" ,
2026-02-06 19:06:41 +08:00
"dashscope==1.25.11" ,
2025-01-14 11:49:43 +08:00
"deepl==1.18.0" ,
"demjson3==3.0.6" ,
"discord-py==2.3.2" ,
2025-11-03 19:59:18 +08:00
"dropbox==12.0.2" ,
2025-01-14 11:49:43 +08:00
"duckduckgo-search>=7.2.0,<8.0.0" ,
"editdistance==0.8.1" ,
"elasticsearch-dsl==8.12.0" ,
2025-12-16 12:40:03 +08:00
"exceptiongroup>=1.3.0,<2.0.0" ,
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0" ,
2025-12-16 12:40:03 +08:00
"ffmpeg-python>=0.2.0" ,
"flasgger>=0.9.7.1,<0.10.0" ,
2025-12-16 13:31:45 +08:00
"flask-cors==6.0.2" ,
2025-01-14 11:49:43 +08:00
"flask-login==0.6.3" ,
2025-12-16 12:40:03 +08:00
"flask-mail>=0.10.0" ,
2025-01-14 11:49:43 +08:00
"flask-session==0.8.0" ,
2026-02-24 10:28:33 +08:00
"google-api-python-client>=2.190.0,<3.0.0" ,
2025-11-10 19:15:02 +08:00
"google-auth-oauthlib>=1.2.0,<2.0.0" ,
2026-02-24 10:28:33 +08:00
"google-cloud-storage>=2.19.0,<3.0.0" ,
2025-12-16 12:40:03 +08:00
"google-genai>=1.41.0,<2.0.0" ,
"google-search-results==2.4.2" ,
2026-03-04 17:48:47 +08:00
"graspologic @ git+https://gitee.com/infiniflow/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd" ,
2025-01-14 11:49:43 +08:00
"groq==0.9.0" ,
2025-12-16 12:40:03 +08:00
"grpcio-status==1.67.1" ,
2025-01-14 11:49:43 +08:00
"html-text==0.6.2" ,
2026-01-27 11:48:02 +08:00
"infinity-sdk==0.7.0-dev2" ,
2025-01-14 11:49:43 +08:00
"infinity-emb>=0.0.66,<0.0.67" ,
2025-11-03 19:59:18 +08:00
"jira==3.10.5" ,
2025-12-16 12:40:03 +08:00
"json-repair==0.35.0" ,
"langfuse>=2.60.0" ,
"mammoth>=1.11.0" ,
2025-01-14 11:49:43 +08:00
"markdown==3.6" ,
"markdown-to-json==2.1.1" ,
2025-12-16 12:40:03 +08:00
"markdownify>=1.2.0" ,
2026-03-13 06:17:39 +03:00
"mcp>=1.23.0" ,
2025-12-16 12:40:03 +08:00
"mini-racer>=0.12.4,<0.13.0" ,
2025-01-14 11:49:43 +08:00
"minio==7.2.4" ,
"mistralai==0.4.2" ,
2026-02-03 23:14:32 -03:00
"mysql-connector-python>=9.0.0,<10.0.0" ,
2025-12-16 12:40:03 +08:00
"moodlepy>=0.23.0" ,
2025-11-03 19:59:18 +08:00
"mypy-boto3-s3==1.40.26" ,
"Office365-REST-Python-Client==2.6.2" ,
2025-10-15 08:54:20 +02:00
"ollama>=0.5.0" ,
2025-12-09 19:55:25 +08:00
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'" ,
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'" ,
2025-01-14 11:49:43 +08:00
"opencv-python==4.10.0.84" ,
"opencv-python-headless==4.10.0.84" ,
2025-06-12 11:37:42 +08:00
"opendal>=0.45.0,<0.46.0" ,
2025-12-16 12:40:03 +08:00
"opensearch-py==2.7.1" ,
2025-01-14 11:49:43 +08:00
"ormsgpack==1.5.0" ,
"pdfplumber==0.10.4" ,
2025-12-16 12:40:03 +08:00
"pluginlib==0.9.4" ,
2025-12-09 19:55:25 +08:00
"psycopg2-binary>=2.9.11,<3.0.0" ,
"pyclipper>=1.4.0,<2.0.0" ,
feat: Implement pluggable multi-provider sandbox architecture (#12820)
## Summary
Implement a flexible sandbox provider system supporting both
self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for
secure code execution in agent workflows.
**Key Changes:**
- ✅ Aliyun Code Interpreter provider using official
`agentrun-sdk>=0.0.16`
- ✅ Self-managed provider with gVisor (runsc) security
- ✅ Arguments parameter support for dynamic code execution
- ✅ Database-only configuration (removed fallback logic)
- ✅ Configuration scripts for quick setup
Issue #12479
## Features
### 🔌 Provider Abstraction Layer
**1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`)
- Wraps existing executor_manager HTTP API
- gVisor (runsc) for secure container isolation
- Configurable pool size, timeout, retry logic
- Languages: Python, Node.js, JavaScript
- ⚠️ **Requires**: gVisor installation, Docker, base images
**2. Aliyun Code Interpreter**
(`agent/sandbox/providers/aliyun_codeinterpreter.py`)
- SaaS integration using official agentrun-sdk
- Serverless microVM execution with auto-authentication
- Hard timeout: 30 seconds max
- Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`,
`AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION`
- Automatically wraps code to call `main()` function
**3. E2B Provider** (`agent/sandbox/providers/e2b.py`)
- Placeholder for future integration
### ⚙️ Configuration System
- `conf/system_settings.json`: Default provider =
`aliyun_codeinterpreter`
- `agent/sandbox/client.py`: Enforces database-only configuration
- Admin UI: `/admin/sandbox-settings`
- Configuration validation via `validate_config()` method
- Health checks for all providers
### 🎯 Key Capabilities
**Arguments Parameter Support:**
All providers support passing arguments to `main()` function:
```python
# User code
def main(name: str, count: int) -> dict:
return {"message": f"Hello {name}!" * count}
# Executed with: arguments={"name": "World", "count": 3}
# Result: {"message": "Hello World!Hello World!Hello World!"}
```
**Self-Describing Providers:**
Each provider implements `get_config_schema()` returning form
configuration for Admin UI
**Error Handling:**
Structured `ExecutionResult` with stdout, stderr, exit_code,
execution_time
## Configuration Scripts
Two scripts for quick Aliyun sandbox setup:
**Shell Script (requires jq):**
```bash
source scripts/configure_aliyun_sandbox.sh
```
**Python Script (interactive):**
```bash
python3 scripts/configure_aliyun_sandbox.py
```
## Testing
```bash
# Unit tests
uv run pytest agent/sandbox/tests/test_providers.py -v
# Aliyun provider tests
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
# Integration tests (requires credentials)
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
# Quick SDK validation
python3 agent/sandbox/tests/verify_sdk.py
```
**Test Coverage:**
- 30 unit tests for provider abstraction
- Provider-specific tests for Aliyun
- Integration tests with real API
- Security tests for executor_manager
## Documentation
- `docs/develop/sandbox_spec.md` - Complete architecture specification
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy
sandbox
- `agent/sandbox/tests/QUICKSTART.md` - Quick start guide
- `agent/sandbox/tests/README.md` - Testing documentation
## Breaking Changes
⚠️ **Migration Required:**
1. **Directory Move**: `sandbox/` → `agent/sandbox/`
- Update imports: `from sandbox.` → `from agent.sandbox.`
2. **Mandatory Configuration**:
- SystemSettings must have `sandbox.provider_type` configured
- Removed fallback default values
- Configuration must exist in database (from
`conf/system_settings.json`)
3. **Aliyun Credentials**:
- Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`)
- `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID)
4. **Self-Managed Provider**:
- gVisor (runsc) must be installed for security
- Install: `go install gvisor.dev/gvisor/runsc@latest`
## Database Schema Changes
```python
# SystemSettings.value: CharField → TextField
api/db/db_models.py: Changed for unlimited config length
# SystemSettingsService.get_by_name(): Fixed query precision
api/db/services/system_settings_service.py: startswith → exact match
```
## Files Changed
### Backend (Python)
- `agent/sandbox/providers/base.py` - SandboxProvider ABC interface
- `agent/sandbox/providers/manager.py` - ProviderManager
- `agent/sandbox/providers/self_managed.py` - Self-managed provider
- `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider
- `agent/sandbox/providers/e2b.py` - E2B provider (placeholder)
- `agent/sandbox/client.py` - Unified client (enforces DB-only config)
- `agent/tools/code_exec.py` - Updated to use provider system
- `admin/server/services.py` - SandboxMgr with registry & validation
- `admin/server/routes.py` - 5 sandbox API endpoints
- `conf/system_settings.json` - Default: aliyun_codeinterpreter
- `api/db/db_models.py` - TextField for SystemSettings.value
- `api/db/services/system_settings_service.py` - Exact match query
### Frontend (TypeScript/React)
- `web/src/pages/admin/sandbox-settings.tsx` - Settings UI
- `web/src/services/admin-service.ts` - Sandbox service functions
- `web/src/services/admin.service.d.ts` - Type definitions
- `web/src/utils/api.ts` - Sandbox API endpoints
### Documentation
- `docs/develop/sandbox_spec.md` - Architecture spec
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide
- `agent/sandbox/tests/QUICKSTART.md` - Quick start
- `agent/sandbox/tests/README.md` - Testing guide
### Configuration Scripts
- `scripts/configure_aliyun_sandbox.sh` - Shell script (jq)
- `scripts/configure_aliyun_sandbox.py` - Python script
### Tests
- `agent/sandbox/tests/test_providers.py` - 30 unit tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` -
Integration tests
- `agent/sandbox/tests/verify_sdk.py` - SDK validation
## Architecture
```
Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B]
↓
SystemSettings
```
## Usage
### 1. Configure Provider
**Via Admin UI:**
1. Navigate to `/admin/sandbox-settings`
2. Select provider (Aliyun Code Interpreter / Self-Managed)
3. Fill in configuration
4. Click "Test Connection" to verify
5. Click "Save" to apply
**Via Configuration Scripts:**
```bash
# Aliyun provider
export AGENTRUN_ACCESS_KEY_ID="xxx"
export AGENTRUN_ACCESS_KEY_SECRET="yyy"
export AGENTRUN_ACCOUNT_ID="zzz"
export AGENTRUN_REGION="cn-shanghai"
source scripts/configure_aliyun_sandbox.sh
```
### 2. Restart Service
```bash
cd docker
docker compose restart ragflow-server
```
### 3. Execute Code in Agent
```python
from agent.sandbox.client import execute_code
result = execute_code(
code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}',
language="python",
timeout=30,
arguments={"name": "World"}
)
print(result.stdout) # {"message": "Hello World!"}
```
## Troubleshooting
### "Container pool is busy" (Self-Managed)
- **Cause**: Pool exhausted (default: 1 container in `.env`)
- **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+
### "Sandbox provider type not configured"
- **Cause**: Database missing configuration
- **Fix**: Run config script or set via Admin UI
### "gVisor not found"
- **Cause**: runsc not installed
- **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp
~/go/bin/runsc /usr/local/bin/`
### Aliyun authentication errors
- **Cause**: Wrong environment variable names
- **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`)
## Checklist
- [x] All tests passing (30 unit tests + integration tests)
- [x] Documentation updated (spec, migration guide, quickstart)
- [x] Type definitions added (TypeScript)
- [x] Admin UI implemented
- [x] Configuration validation
- [x] Health checks implemented
- [x] Error handling with structured results
- [x] Breaking changes documented
- [x] Configuration scripts created
- [x] gVisor requirements documented
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
2025-01-14 11:49:43 +08:00
"pycryptodomex==3.20.0" ,
2026-01-15 15:21:34 +08:00
"pyobvector==0.2.22" ,
2025-12-16 12:40:03 +08:00
"pyodbc>=5.2.0,<6.0.0" ,
"pypandoc>=1.16" ,
2026-03-13 11:34:28 +03:00
"pypdf>=6.8.0" ,
2025-12-16 12:40:03 +08:00
"python-calamine>=0.4.0" ,
"python-docx>=1.1.2,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"python-pptx>=1.0.2,<2.0.0" ,
feat: Implement pluggable multi-provider sandbox architecture (#12820)
## Summary
Implement a flexible sandbox provider system supporting both
self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for
secure code execution in agent workflows.
**Key Changes:**
- ✅ Aliyun Code Interpreter provider using official
`agentrun-sdk>=0.0.16`
- ✅ Self-managed provider with gVisor (runsc) security
- ✅ Arguments parameter support for dynamic code execution
- ✅ Database-only configuration (removed fallback logic)
- ✅ Configuration scripts for quick setup
Issue #12479
## Features
### 🔌 Provider Abstraction Layer
**1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`)
- Wraps existing executor_manager HTTP API
- gVisor (runsc) for secure container isolation
- Configurable pool size, timeout, retry logic
- Languages: Python, Node.js, JavaScript
- ⚠️ **Requires**: gVisor installation, Docker, base images
**2. Aliyun Code Interpreter**
(`agent/sandbox/providers/aliyun_codeinterpreter.py`)
- SaaS integration using official agentrun-sdk
- Serverless microVM execution with auto-authentication
- Hard timeout: 30 seconds max
- Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`,
`AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION`
- Automatically wraps code to call `main()` function
**3. E2B Provider** (`agent/sandbox/providers/e2b.py`)
- Placeholder for future integration
### ⚙️ Configuration System
- `conf/system_settings.json`: Default provider =
`aliyun_codeinterpreter`
- `agent/sandbox/client.py`: Enforces database-only configuration
- Admin UI: `/admin/sandbox-settings`
- Configuration validation via `validate_config()` method
- Health checks for all providers
### 🎯 Key Capabilities
**Arguments Parameter Support:**
All providers support passing arguments to `main()` function:
```python
# User code
def main(name: str, count: int) -> dict:
return {"message": f"Hello {name}!" * count}
# Executed with: arguments={"name": "World", "count": 3}
# Result: {"message": "Hello World!Hello World!Hello World!"}
```
**Self-Describing Providers:**
Each provider implements `get_config_schema()` returning form
configuration for Admin UI
**Error Handling:**
Structured `ExecutionResult` with stdout, stderr, exit_code,
execution_time
## Configuration Scripts
Two scripts for quick Aliyun sandbox setup:
**Shell Script (requires jq):**
```bash
source scripts/configure_aliyun_sandbox.sh
```
**Python Script (interactive):**
```bash
python3 scripts/configure_aliyun_sandbox.py
```
## Testing
```bash
# Unit tests
uv run pytest agent/sandbox/tests/test_providers.py -v
# Aliyun provider tests
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
# Integration tests (requires credentials)
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
# Quick SDK validation
python3 agent/sandbox/tests/verify_sdk.py
```
**Test Coverage:**
- 30 unit tests for provider abstraction
- Provider-specific tests for Aliyun
- Integration tests with real API
- Security tests for executor_manager
## Documentation
- `docs/develop/sandbox_spec.md` - Complete architecture specification
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy
sandbox
- `agent/sandbox/tests/QUICKSTART.md` - Quick start guide
- `agent/sandbox/tests/README.md` - Testing documentation
## Breaking Changes
⚠️ **Migration Required:**
1. **Directory Move**: `sandbox/` → `agent/sandbox/`
- Update imports: `from sandbox.` → `from agent.sandbox.`
2. **Mandatory Configuration**:
- SystemSettings must have `sandbox.provider_type` configured
- Removed fallback default values
- Configuration must exist in database (from
`conf/system_settings.json`)
3. **Aliyun Credentials**:
- Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`)
- `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID)
4. **Self-Managed Provider**:
- gVisor (runsc) must be installed for security
- Install: `go install gvisor.dev/gvisor/runsc@latest`
## Database Schema Changes
```python
# SystemSettings.value: CharField → TextField
api/db/db_models.py: Changed for unlimited config length
# SystemSettingsService.get_by_name(): Fixed query precision
api/db/services/system_settings_service.py: startswith → exact match
```
## Files Changed
### Backend (Python)
- `agent/sandbox/providers/base.py` - SandboxProvider ABC interface
- `agent/sandbox/providers/manager.py` - ProviderManager
- `agent/sandbox/providers/self_managed.py` - Self-managed provider
- `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider
- `agent/sandbox/providers/e2b.py` - E2B provider (placeholder)
- `agent/sandbox/client.py` - Unified client (enforces DB-only config)
- `agent/tools/code_exec.py` - Updated to use provider system
- `admin/server/services.py` - SandboxMgr with registry & validation
- `admin/server/routes.py` - 5 sandbox API endpoints
- `conf/system_settings.json` - Default: aliyun_codeinterpreter
- `api/db/db_models.py` - TextField for SystemSettings.value
- `api/db/services/system_settings_service.py` - Exact match query
### Frontend (TypeScript/React)
- `web/src/pages/admin/sandbox-settings.tsx` - Settings UI
- `web/src/services/admin-service.ts` - Sandbox service functions
- `web/src/services/admin.service.d.ts` - Type definitions
- `web/src/utils/api.ts` - Sandbox API endpoints
### Documentation
- `docs/develop/sandbox_spec.md` - Architecture spec
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide
- `agent/sandbox/tests/QUICKSTART.md` - Quick start
- `agent/sandbox/tests/README.md` - Testing guide
### Configuration Scripts
- `scripts/configure_aliyun_sandbox.sh` - Shell script (jq)
- `scripts/configure_aliyun_sandbox.py` - Python script
### Tests
- `agent/sandbox/tests/test_providers.py` - 30 unit tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` -
Integration tests
- `agent/sandbox/tests/verify_sdk.py` - SDK validation
## Architecture
```
Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B]
↓
SystemSettings
```
## Usage
### 1. Configure Provider
**Via Admin UI:**
1. Navigate to `/admin/sandbox-settings`
2. Select provider (Aliyun Code Interpreter / Self-Managed)
3. Fill in configuration
4. Click "Test Connection" to verify
5. Click "Save" to apply
**Via Configuration Scripts:**
```bash
# Aliyun provider
export AGENTRUN_ACCESS_KEY_ID="xxx"
export AGENTRUN_ACCESS_KEY_SECRET="yyy"
export AGENTRUN_ACCOUNT_ID="zzz"
export AGENTRUN_REGION="cn-shanghai"
source scripts/configure_aliyun_sandbox.sh
```
### 2. Restart Service
```bash
cd docker
docker compose restart ragflow-server
```
### 3. Execute Code in Agent
```python
from agent.sandbox.client import execute_code
result = execute_code(
code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}',
language="python",
timeout=30,
arguments={"name": "World"}
)
print(result.stdout) # {"message": "Hello World!"}
```
## Troubleshooting
### "Container pool is busy" (Self-Managed)
- **Cause**: Pool exhausted (default: 1 container in `.env`)
- **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+
### "Sandbox provider type not configured"
- **Cause**: Database missing configuration
- **Fix**: Run config script or set via Admin UI
### "gVisor not found"
- **Cause**: runsc not installed
- **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp
~/go/bin/runsc /usr/local/bin/`
### Aliyun authentication errors
- **Cause**: Wrong environment variable names
- **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`)
## Checklist
- [x] All tests passing (30 unit tests + integration tests)
- [x] Documentation updated (spec, migration guide, quickstart)
- [x] Type definitions added (TypeScript)
- [x] Admin UI implemented
- [x] Configuration validation
- [x] Health checks implemented
- [x] Error handling with structured results
- [x] Breaking changes documented
- [x] Configuration scripts created
- [x] gVisor requirements documented
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
2025-01-14 11:49:43 +08:00
"qianfan==0.4.6" ,
2025-11-18 17:05:16 +08:00
"quart-auth==0.11.0" ,
"quart-cors==0.8.0" ,
2025-01-14 11:49:43 +08:00
"ranx==0.3.20" ,
2025-12-09 19:55:25 +08:00
"readability-lxml>=0.8.4,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"replicate==0.31.0" ,
2025-12-16 12:40:03 +08:00
"reportlab>=4.4.1" ,
2025-01-14 11:49:43 +08:00
"roman-numbers==1.0.2" ,
"ruamel-base==1.0.0" ,
2025-06-12 11:37:42 +08:00
"ruamel-yaml>=0.18.6,<0.19.0" ,
2025-01-14 11:49:43 +08:00
"scholarly==1.7.11" ,
"selenium-wire==5.1.0" ,
2025-11-03 19:59:18 +08:00
"slack-sdk==3.37.0" ,
2025-12-16 12:40:03 +08:00
"socksio==1.0.0" ,
feat: Implement pluggable multi-provider sandbox architecture (#12820)
## Summary
Implement a flexible sandbox provider system supporting both
self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for
secure code execution in agent workflows.
**Key Changes:**
- ✅ Aliyun Code Interpreter provider using official
`agentrun-sdk>=0.0.16`
- ✅ Self-managed provider with gVisor (runsc) security
- ✅ Arguments parameter support for dynamic code execution
- ✅ Database-only configuration (removed fallback logic)
- ✅ Configuration scripts for quick setup
Issue #12479
## Features
### 🔌 Provider Abstraction Layer
**1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`)
- Wraps existing executor_manager HTTP API
- gVisor (runsc) for secure container isolation
- Configurable pool size, timeout, retry logic
- Languages: Python, Node.js, JavaScript
- ⚠️ **Requires**: gVisor installation, Docker, base images
**2. Aliyun Code Interpreter**
(`agent/sandbox/providers/aliyun_codeinterpreter.py`)
- SaaS integration using official agentrun-sdk
- Serverless microVM execution with auto-authentication
- Hard timeout: 30 seconds max
- Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`,
`AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION`
- Automatically wraps code to call `main()` function
**3. E2B Provider** (`agent/sandbox/providers/e2b.py`)
- Placeholder for future integration
### ⚙️ Configuration System
- `conf/system_settings.json`: Default provider =
`aliyun_codeinterpreter`
- `agent/sandbox/client.py`: Enforces database-only configuration
- Admin UI: `/admin/sandbox-settings`
- Configuration validation via `validate_config()` method
- Health checks for all providers
### 🎯 Key Capabilities
**Arguments Parameter Support:**
All providers support passing arguments to `main()` function:
```python
# User code
def main(name: str, count: int) -> dict:
return {"message": f"Hello {name}!" * count}
# Executed with: arguments={"name": "World", "count": 3}
# Result: {"message": "Hello World!Hello World!Hello World!"}
```
**Self-Describing Providers:**
Each provider implements `get_config_schema()` returning form
configuration for Admin UI
**Error Handling:**
Structured `ExecutionResult` with stdout, stderr, exit_code,
execution_time
## Configuration Scripts
Two scripts for quick Aliyun sandbox setup:
**Shell Script (requires jq):**
```bash
source scripts/configure_aliyun_sandbox.sh
```
**Python Script (interactive):**
```bash
python3 scripts/configure_aliyun_sandbox.py
```
## Testing
```bash
# Unit tests
uv run pytest agent/sandbox/tests/test_providers.py -v
# Aliyun provider tests
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
# Integration tests (requires credentials)
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
# Quick SDK validation
python3 agent/sandbox/tests/verify_sdk.py
```
**Test Coverage:**
- 30 unit tests for provider abstraction
- Provider-specific tests for Aliyun
- Integration tests with real API
- Security tests for executor_manager
## Documentation
- `docs/develop/sandbox_spec.md` - Complete architecture specification
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy
sandbox
- `agent/sandbox/tests/QUICKSTART.md` - Quick start guide
- `agent/sandbox/tests/README.md` - Testing documentation
## Breaking Changes
⚠️ **Migration Required:**
1. **Directory Move**: `sandbox/` → `agent/sandbox/`
- Update imports: `from sandbox.` → `from agent.sandbox.`
2. **Mandatory Configuration**:
- SystemSettings must have `sandbox.provider_type` configured
- Removed fallback default values
- Configuration must exist in database (from
`conf/system_settings.json`)
3. **Aliyun Credentials**:
- Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`)
- `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID)
4. **Self-Managed Provider**:
- gVisor (runsc) must be installed for security
- Install: `go install gvisor.dev/gvisor/runsc@latest`
## Database Schema Changes
```python
# SystemSettings.value: CharField → TextField
api/db/db_models.py: Changed for unlimited config length
# SystemSettingsService.get_by_name(): Fixed query precision
api/db/services/system_settings_service.py: startswith → exact match
```
## Files Changed
### Backend (Python)
- `agent/sandbox/providers/base.py` - SandboxProvider ABC interface
- `agent/sandbox/providers/manager.py` - ProviderManager
- `agent/sandbox/providers/self_managed.py` - Self-managed provider
- `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider
- `agent/sandbox/providers/e2b.py` - E2B provider (placeholder)
- `agent/sandbox/client.py` - Unified client (enforces DB-only config)
- `agent/tools/code_exec.py` - Updated to use provider system
- `admin/server/services.py` - SandboxMgr with registry & validation
- `admin/server/routes.py` - 5 sandbox API endpoints
- `conf/system_settings.json` - Default: aliyun_codeinterpreter
- `api/db/db_models.py` - TextField for SystemSettings.value
- `api/db/services/system_settings_service.py` - Exact match query
### Frontend (TypeScript/React)
- `web/src/pages/admin/sandbox-settings.tsx` - Settings UI
- `web/src/services/admin-service.ts` - Sandbox service functions
- `web/src/services/admin.service.d.ts` - Type definitions
- `web/src/utils/api.ts` - Sandbox API endpoints
### Documentation
- `docs/develop/sandbox_spec.md` - Architecture spec
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide
- `agent/sandbox/tests/QUICKSTART.md` - Quick start
- `agent/sandbox/tests/README.md` - Testing guide
### Configuration Scripts
- `scripts/configure_aliyun_sandbox.sh` - Shell script (jq)
- `scripts/configure_aliyun_sandbox.py` - Python script
### Tests
- `agent/sandbox/tests/test_providers.py` - 30 unit tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` -
Integration tests
- `agent/sandbox/tests/verify_sdk.py` - SDK validation
## Architecture
```
Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B]
↓
SystemSettings
```
## Usage
### 1. Configure Provider
**Via Admin UI:**
1. Navigate to `/admin/sandbox-settings`
2. Select provider (Aliyun Code Interpreter / Self-Managed)
3. Fill in configuration
4. Click "Test Connection" to verify
5. Click "Save" to apply
**Via Configuration Scripts:**
```bash
# Aliyun provider
export AGENTRUN_ACCESS_KEY_ID="xxx"
export AGENTRUN_ACCESS_KEY_SECRET="yyy"
export AGENTRUN_ACCOUNT_ID="zzz"
export AGENTRUN_REGION="cn-shanghai"
source scripts/configure_aliyun_sandbox.sh
```
### 2. Restart Service
```bash
cd docker
docker compose restart ragflow-server
```
### 3. Execute Code in Agent
```python
from agent.sandbox.client import execute_code
result = execute_code(
code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}',
language="python",
timeout=30,
arguments={"name": "World"}
)
print(result.stdout) # {"message": "Hello World!"}
```
## Troubleshooting
### "Container pool is busy" (Self-Managed)
- **Cause**: Pool exhausted (default: 1 container in `.env`)
- **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+
### "Sandbox provider type not configured"
- **Cause**: Database missing configuration
- **Fix**: Run config script or set via Admin UI
### "gVisor not found"
- **Cause**: runsc not installed
- **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp
~/go/bin/runsc /usr/local/bin/`
### Aliyun authentication errors
- **Cause**: Wrong environment variable names
- **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`)
## Checklist
- [x] All tests passing (30 unit tests + integration tests)
- [x] Documentation updated (spec, migration guide, quickstart)
- [x] Type definitions added (TypeScript)
- [x] Admin UI implemented
- [x] Configuration validation
- [x] Health checks implemented
- [x] Error handling with structured results
- [x] Breaking changes documented
- [x] Configuration scripts created
- [x] gVisor requirements documented
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
"agentrun-sdk>=0.0.16,<1.0.0" ,
"nest-asyncio>=1.6.0,<2.0.0" , # Needed for agent/component/message.py
2025-12-16 12:40:03 +08:00
"sqlglotrs==0.9.0" ,
2025-01-14 11:49:43 +08:00
"strenum==0.4.15" ,
2025-02-26 10:21:04 +08:00
"tavily-python==0.5.1" ,
2025-10-27 15:14:58 +08:00
"tencentcloud-sdk-python==3.0.1478" ,
2025-01-14 11:49:43 +08:00
"tika==2.6.0" ,
2025-12-16 12:40:03 +08:00
"valkey==6.0.2" ,
2025-07-30 19:41:09 +08:00
"volcengine==1.0.194" ,
2025-01-14 11:49:43 +08:00
"voyageai==0.2.3" ,
2025-11-26 07:14:42 +01:00
"webdav4>=0.10.0,<0.11.0" ,
2025-01-14 11:49:43 +08:00
"webdriver-manager==4.0.1" ,
"wikipedia==1.4.0" ,
"word2number==1.1" ,
2025-05-27 09:28:52 +08:00
"xgboost==1.6.0" ,
2025-01-14 11:49:43 +08:00
"xpinyin==0.7.6" ,
2025-07-30 19:41:09 +08:00
"yfinance==0.2.65" ,
2025-01-14 11:49:43 +08:00
"zhipuai==2.0.1" ,
2026-02-26 10:24:13 +08:00
"peewee>=3.17.1,<4.0.0" ,
2025-12-25 17:50:41 +08:00
# following modules aren't necessary
# "nltk==3.9.1",
# "numpy>=1.26.0,<2.0.0",
# "openai>=1.45.0",
# "openpyxl>=3.1.0,<4.0.0",
# "pandas>=2.2.0,<3.0.0",
# "pillow>=10.4.0,<13.0.0",
# "protobuf==5.27.2",
# "pymysql>=1.1.1,<2.0.0",
# "python-dotenv==1.0.1",
# "python-dateutil==2.8.2",
# "Quart==0.20.0",
# "requests>=2.32.3,<3.0.0",
# "scikit-learn==1.5.0",
# "selenium==4.22.0",
# "setuptools>=78.1.1,<81.0.0",
# "shapely==2.0.5",
# "six==1.16.0",
# "tabulate==0.9.0",
# "tiktoken==0.7.0",
# "umap_learn==0.5.6",
# "werkzeug==3.0.6",
# "xxhash>=3.5.0,<4.0.0",
# "trio>=0.17.0,<0.29.0",
# "debugpy>=1.8.13",
# "click>=8.1.8",
# "litellm>=1.74.15.post1",
# "lark>=1.2.2",
# "pip>=25.2",
# "imageio-ffmpeg>=0.6.0",
# "cryptography==46.0.3",
# "jinja2>=3.1.0",
"pyairtable>=3.3.0" ,
2025-12-30 15:09:52 +08:00
"pygithub>=2.8.1" ,
2025-12-29 13:28:37 +08:00
"asana>=5.2.2" ,
2025-12-29 17:05:20 +08:00
"python-gitlab>=7.0.0" ,
2026-03-06 21:13:23 +08:00
"alibabacloud-dingtalk>=2.0.0" ,
2026-01-09 17:48:20 +08:00
"quart-schema==0.23.0" ,
2025-03-03 18:59:49 +08:00
]
2025-06-03 15:21:06 +08:00
[ dependency-groups ]
test = [
"hypothesis>=6.132.0" ,
"openpyxl>=3.1.5" ,
2025-12-09 19:55:25 +08:00
"pillow>=10.4.0,<13.0.0" ,
2025-06-03 15:21:06 +08:00
"pytest>=8.3.5" ,
2025-12-05 11:40:16 +08:00
"pytest-asyncio>=1.3.0" ,
"pytest-xdist>=3.8.0" ,
"pytest-cov>=7.0.0" ,
2025-06-03 15:21:06 +08:00
"python-docx>=1.1.2" ,
"python-pptx>=1.0.2" ,
"reportlab>=4.4.1" ,
"requests>=2.32.2" ,
"requests-toolbelt>=1.0.0" ,
2026-01-14 13:49:16 +08:00
"pycryptodomex==3.20.0" ,
2026-03-02 13:04:08 +08:00
"pytest-playwright>=0.7.2" ,
2026-01-30 14:49:16 +08:00
"codecov>=2.1.13" ,
2025-06-03 15:21:06 +08:00
]
2025-09-26 14:55:19 +08:00
[ [ tool . uv . index ] ]
2025-10-10 12:41:45 +08:00
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
2025-09-26 14:55:19 +08:00
2025-03-21 18:44:49 +08:00
[ tool . setuptools ]
2025-06-04 13:16:17 +08:00
packages = [
'agent' ,
'api' ,
'deepdoc' ,
'graphrag' ,
'intergrations.chatgpt-on-wechat.plugins' ,
'mcp.server' ,
'rag' ,
'sdk.python.ragflow_sdk' ,
]
2025-03-21 18:44:49 +08:00
2025-03-20 22:31:18 +08:00
[ tool . ruff ]
line-length = 200
2025-06-04 13:16:17 +08:00
exclude = [ ".venv" , "rag/svr/discord_svr.py" ]
2025-03-20 22:31:18 +08:00
[ tool . ruff . lint ]
extend-select = [ "ASYNC" , "ASYNC1" ]
ignore = [ "E402" ]
2025-06-04 13:16:17 +08:00
[ tool . pytest . ini_options ]
2025-12-05 11:40:16 +08:00
pythonpath = [
"."
]
testpaths = [ "test" ]
python_files = [ "test_*.py" ]
python_classes = [ "Test*" ]
python_functions = [ "test_*" ]
2025-06-04 13:16:17 +08:00
markers = [
2026-03-02 13:04:08 +08:00
"p0: critical priority test cases" ,
2025-06-04 13:16:17 +08:00
"p1: high priority test cases" ,
"p2: medium priority test cases" ,
"p3: low priority test cases" ,
2026-03-02 13:04:08 +08:00
"smoke: smoke test cases" ,
"auth: authentication UI tests" ,
2026-03-09 19:00:17 +08:00
"asyncio: mark test as async" ,
2025-06-04 13:16:17 +08:00
]
2025-12-05 11:40:16 +08:00
# Test collection and runtime configuration
filterwarnings = [
"error" , # Treat warnings as errors
"ignore::DeprecationWarning" , # Ignore specific warnings
]
# Command line options
addopts = [
"-v" , # Verbose output
"--strict-markers" , # Enforce marker definitions
"--tb=short" , # Simplified traceback
"--disable-warnings" , # Disable warnings
"--color=yes" # Colored output
]
# Coverage configuration
[ tool . coverage . run ]
# Source paths - adjust according to your project structure
source = [
# "../../api/db/services",
# Add more directories if needed:
"../../common" ,
# "../../utils",
]
# Files/directories to exclude
omit = [
"*/tests/*" ,
"*/test_*" ,
"*/__pycache__/*" ,
"*/.pytest_cache/*" ,
"*/venv/*" ,
"*/.venv/*" ,
"*/env/*" ,
"*/site-packages/*" ,
"*/dist/*" ,
"*/build/*" ,
"*/migrations/*" ,
"setup.py"
]
[ tool . coverage . report ]
# Report configuration
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # Minimum coverage requirement (0-100)
# Lines to exclude (optional)
exclude_lines = [
# "pragma: no cover",
# "def __repr__",
# "raise AssertionError",
# "raise NotImplementedError",
# "if __name__ == .__main__.:",
# "if TYPE_CHECKING:",
"pass"
]
[ tool . coverage . html ]
# HTML report configuration
directory = "htmlcov"
title = "Test Coverage Report"
2026-02-24 10:28:33 +08:00
# extra_css = "custom.css" # Optional custom CSS