2026-04-07 15:08:11 +08:00
|
|
|
const webAPI = `/v1`;
|
|
|
|
|
const restAPIv1 = `/api/v1`;
|
2024-08-01 17:27:27 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
export { restAPIv1, webAPI };
|
2024-08-01 17:27:27 +08:00
|
|
|
|
|
|
|
|
export default {
|
|
|
|
|
// user
|
2026-04-24 10:25:15 +08:00
|
|
|
login: `${restAPIv1}/auth/login`,
|
|
|
|
|
logout: `${restAPIv1}/auth/logout`,
|
|
|
|
|
register: `${restAPIv1}/users`,
|
|
|
|
|
setting: `${restAPIv1}/users/me`,
|
|
|
|
|
userInfo: `${restAPIv1}/users/me`,
|
|
|
|
|
tenantInfo: `${restAPIv1}/users/me/models`,
|
|
|
|
|
loginChannels: `${restAPIv1}/auth/login/channels`,
|
|
|
|
|
loginChannel: (channel: string) => `${restAPIv1}/auth/login/${channel}`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
2024-10-18 09:21:01 +08:00
|
|
|
// team
|
2026-04-22 20:00:32 +08:00
|
|
|
addTenantUser: (tenantId: string) => `${restAPIv1}/tenants/${tenantId}/users`,
|
2024-10-18 09:21:01 +08:00
|
|
|
listTenantUser: (tenantId: string) =>
|
2026-04-22 20:00:32 +08:00
|
|
|
`${restAPIv1}/tenants/${tenantId}/users`,
|
|
|
|
|
deleteTenantUser: (tenantId: string) =>
|
|
|
|
|
`${restAPIv1}/tenants/${tenantId}/users`,
|
|
|
|
|
listTenant: `${restAPIv1}/tenants`,
|
|
|
|
|
agreeTenant: (tenantId: string) => `${restAPIv1}/tenants/${tenantId}`,
|
2024-10-18 09:21:01 +08:00
|
|
|
|
2024-08-01 17:27:27 +08:00
|
|
|
// llm model
|
2026-05-29 17:39:41 +08:00
|
|
|
listAllAddedModels: `${restAPIv1}/models`,
|
|
|
|
|
defaultModel: `${restAPIv1}/models/default`,
|
|
|
|
|
listProviders: `${restAPIv1}/providers`,
|
|
|
|
|
addProvider: `${restAPIv1}/providers/`,
|
|
|
|
|
addProviderInstance: ({ llm_factory }: { llm_factory: string }) =>
|
|
|
|
|
`${restAPIv1}/providers/${llm_factory}/instances`,
|
2026-06-03 11:59:57 +08:00
|
|
|
verifyProviderConnection: ({ provider_name }: { provider_name: string }) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/connection`,
|
2026-06-08 16:46:52 +08:00
|
|
|
listProviderModels: ({ provider_name }: { provider_name: string }) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/models`,
|
2026-05-29 17:39:41 +08:00
|
|
|
listProviderInstances: ({ provider_name }: { provider_name: string }) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances`,
|
|
|
|
|
listInstanceModels: ({
|
|
|
|
|
provider_name,
|
|
|
|
|
instance_name,
|
|
|
|
|
}: {
|
|
|
|
|
provider_name: string;
|
|
|
|
|
instance_name: string;
|
|
|
|
|
}) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances/${instance_name}/models`,
|
|
|
|
|
showProviderInstance: ({
|
|
|
|
|
provider_name,
|
|
|
|
|
instance_name,
|
|
|
|
|
}: {
|
|
|
|
|
provider_name: string;
|
|
|
|
|
instance_name: string;
|
|
|
|
|
}) => `${restAPIv1}/providers/${provider_name}/instances/${instance_name}`,
|
|
|
|
|
addInstanceModel: ({
|
|
|
|
|
provider_name,
|
|
|
|
|
instance_name,
|
|
|
|
|
}: {
|
|
|
|
|
provider_name: string;
|
|
|
|
|
instance_name: string;
|
|
|
|
|
}) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances/${instance_name}/models`,
|
2026-06-15 19:11:05 +08:00
|
|
|
editInstanceModel: ({
|
|
|
|
|
provider_name,
|
|
|
|
|
instance_name,
|
|
|
|
|
}: {
|
|
|
|
|
provider_name: string;
|
|
|
|
|
instance_name: string;
|
|
|
|
|
}) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances/${instance_name}/models`,
|
2026-05-29 17:39:41 +08:00
|
|
|
deleteProviderInstance: ({ provider_name }: { provider_name: string }) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances`,
|
|
|
|
|
updateModelStatus: ({
|
|
|
|
|
provider_name,
|
|
|
|
|
instance_name,
|
|
|
|
|
model_name,
|
|
|
|
|
}: {
|
|
|
|
|
provider_name: string;
|
|
|
|
|
instance_name: string;
|
|
|
|
|
model_name: string;
|
|
|
|
|
}) =>
|
|
|
|
|
`${restAPIv1}/providers/${provider_name}/instances/${instance_name}/models/${model_name}`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
2025-11-06 11:53:46 +08:00
|
|
|
// data source
|
2026-04-23 11:40:45 +08:00
|
|
|
dataSourceUpdate: (id: string) => `${restAPIv1}/connectors/${id}`,
|
2026-04-22 20:42:41 +08:00
|
|
|
dataSourceSet: `${restAPIv1}/connectors`,
|
|
|
|
|
dataSourceList: `${restAPIv1}/connectors`,
|
|
|
|
|
dataSourceDel: (id: string) => `${restAPIv1}/connectors/${id}`,
|
|
|
|
|
dataSourceRebuild: (id: string) => `${restAPIv1}/connectors/${id}/rebuild`,
|
|
|
|
|
dataSourceLogs: (id: string) => `${restAPIv1}/connectors/${id}/logs`,
|
|
|
|
|
dataSourceDetail: (id: string) => `${restAPIv1}/connectors/${id}`,
|
Feature/generic api connector (#13545)
# feat: Add Generic REST API Connector
## What problem does this PR solve?
RAGFlow supports many specific data source connectors (MySQL, Slack,
Google Drive, etc.), but there was no way to connect an arbitrary REST
API as a data source. Users with custom or third-party APIs had to write
a new connector class for each one.
This PR adds a **generic, configuration-driven REST API connector** that
lets users connect any REST API as a data source entirely through the UI
— no code changes needed per API.
---
## Features
### Core Connector (`common/data_source/rest_api_connector.py`)
- Implements `LoadConnector` and `PollConnector` interfaces for full and
incremental sync
- **Configurable authentication:** None, API Key (custom header), Bearer
Token, Basic Auth
- **Pluggable pagination:** Page-based, Offset-based, Cursor-based, or
None
- Smart page-size inference from user's query parameters to avoid
duplicate/conflicting params
- Configurable request delay between pages to prevent API rate limiting
- Auto-detection of the items array in JSON responses (`items`,
`results`, `data`, `records`, or first list found)
- **Advanced field mapping** with dot-notation (`country.name`), array
wildcards (`newsType[*].name`), type hints, and default values
- Optional content template rendering (`"Title: {title}\nBody: {body}"`)
- HTML stripping for content fields
- Stable document IDs via `hash128` from a configurable ID field or
auto-generated from item content
- Pydantic configuration schema with automatic coercion of UI string
inputs to dicts/lists
### Backend Registration (`rag/svr/sync_data_source.py`,
`common/constants.py`, `common/data_source/config.py`)
- `REST_API` sync class wired into RAGFlow's `func_factory`
- Full sync (`load_from_state`) and incremental polling (`poll_source`)
support
- Credentials and config passed from task to connector following
existing patterns (MySQL, SeaFile, etc.)
### Test Connection Endpoint (`api/apps/connector_app.py`)
- `POST /v1/connector/<id>/test` validates config schema,
authentication, and API connectivity without triggering a sync
- Clear error messages for auth failures vs. config issues
### Frontend UI (`web/src/pages/user-setting/data-source/constant/`)
- **Postman-style configuration:** Base URL, Query Parameters (key=value
per line), Auth, Content Fields, Metadata Fields, Pagination Type
- Auth-type-aware form: fields for API key header/value, Bearer token,
or Basic username/password appear only when relevant
- **Advanced Settings** toggle for: Custom Headers, Max Pages, Request
Delay, Poll Timestamp Field, Request Body (POST)
- Connector icon (SVG) and i18n strings (English)
- **"Test Connection"** button to validate before syncing
---
## Controls & Safety
- Configurable max pages safety cap (default: 1000, adjustable in UI)
- Configurable request delay between pages (default: 0.5s, adjustable in
UI)
- Auth errors (401/403) fail immediately without retries; transient
errors retry with exponential backoff
- Diagnostic logging: auth setup confirmation, request details on
failure, content field extraction status
---
## Type of change
- [x] New Feature (non-breaking change which adds functionality)
##Visual Screenshots of Features
<img width="482" height="510" alt="Screenshot 2026-03-11 at 5 19 52 PM"
src="https://github.com/user-attachments/assets/dcb7ab4a-1622-44f3-bb02-d6f0527314c4"
/>
(Connector can be configured within the external data sources tab)
Configuration Parameters:
<img width="661" height="682" alt="Screenshot 2026-03-11 at 5 20 46 PM"
src="https://github.com/user-attachments/assets/5e154e71-4ab5-4872-bfb2-04f02b73c18a"
/>
<img width="661" height="682" alt="Screenshot 2026-03-11 at 5 20 54 PM"
src="https://github.com/user-attachments/assets/00cb14b7-0bcf-4b94-9d71-34e93369ecb2"
/>
Connection can be tested before attaching to dataset:
<img width="981" height="681" alt="Screenshot 2026-03-11 at 5 21 40 PM"
src="https://github.com/user-attachments/assets/aaa6eeeb-89a7-4349-bc34-2423bf8be9ee"
/>
Ingestion tested with API connector (works perfectly fine):
<img width="1062" height="705" alt="Screenshot 2026-03-11 at 5 22 30 PM"
src="https://github.com/user-attachments/assets/afcd0d58-cadd-4152-badc-d2f14d96fbec"
/>
Search & Retrieval works as well with metadata flow:
<img width="1062" height="705" alt="Screenshot 2026-03-11 at 5 23 05 PM"
src="https://github.com/user-attachments/assets/d41ee935-dcf7-4456-b317-22a76ca032c0"
/>
---------
Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-05-13 17:35:01 +05:00
|
|
|
dataSourceTest: (id: string) => `${restAPIv1}/connectors/${id}/test`,
|
2025-11-28 13:09:40 +08:00
|
|
|
googleWebAuthStart: (type: 'google-drive' | 'gmail') =>
|
2026-04-22 20:42:41 +08:00
|
|
|
`${restAPIv1}/connectors/google/oauth/web/start?type=${type}`,
|
2025-11-28 13:09:40 +08:00
|
|
|
googleWebAuthResult: (type: 'google-drive' | 'gmail') =>
|
2026-04-22 20:42:41 +08:00
|
|
|
`${restAPIv1}/connectors/google/oauth/web/result?type=${type}`,
|
|
|
|
|
boxWebAuthStart: () => `${restAPIv1}/connectors/box/oauth/web/start`,
|
|
|
|
|
boxWebAuthResult: () => `${restAPIv1}/connectors/box/oauth/web/result`,
|
2025-11-06 11:53:46 +08:00
|
|
|
|
Feat: chat channels — connect assistants to external messaging bots (#15850)
### What problem does this PR solve?
#15844
Adds a **Chat channels** capability so a RAGFlow assistant (Dialog) can
be exposed as a bot on external messaging platforms (Feishu/Lark,
Discord, Telegram, Slack, WeCom, LINE, etc.). An admin configures a bot
in the UI, connects it to an assistant, and inbound messages are
answered from that assistant's knowledge base — replies are delivered
back on the channel.
**Feishu/Lark is implemented and tested end-to-end.** Discord, Telegram,
LINE, and WeCom are scaffolded against the same interface; the remaining
listed channels are tracked as follow-ups.
### Design
**Backend**
- New `chat_channel` table (`tenant_id`, `name`, `channel`, `config`
JSON holding `{credential: {...}}`, `dialog_id`, `status`) +
`ChatChannelService` and RESTful CRUD under `/api/v1/chat_channels`.
- Channel framework under `api/channels/`: a `core` registry +
per-channel packages that self-register a builder and implement a common
`Channel` interface (`start`/`stop`/`send` + inbound normalization) over
`IncomingMessage`/`OutgoingMessage`.
- Embedded **reconcile loop** in `ragflow_server`
(`api/channels/bootstrap.py`): loads enabled bots, and
starts/stops/restarts them as rows change (no server restart needed).
Inbound messages run the connected dialog via the non-streaming
completion path, keeping per-end-user conversation history.
- Missing optional channel SDKs degrade gracefully (channel skipped with
a warning; others unaffected). Channel-level errors are logged, not
crashed.
- Feishu's WebSocket client runs in a dedicated thread with its own
event loop to avoid cross-loop/contextvars conflicts with the channel
runtime.
**Frontend**
- **Settings → Chat channels** panel: available-channels grid +
configured-bots list with add/edit/delete and a **Connect assistant**
popup that binds a bot to a dialog.
- Brand icons via simple-icons / reused shared data-source assets, with
colored fallbacks for brands not available.
- Route, sidebar entry, i18n (en/zh), and a top-nav segment-boundary fix
so the settings page no longer highlights the Chat tab.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### Notes
- DB: new `chat_channel` table is auto-created; `chat_channel.dialog_id`
is also covered by a `migrate_db` `alter_db_add_column` for existing
installs.
- Channel SDKs (`lark-oapi`, `discord.py`, `python-telegram-bot`,
`line-bot-sdk`, `wechatpy`, `aiohttp`) added to dependencies.
- Screenshots / per-channel credential docs to follow.
<img width="1338" height="1290" alt="Image"
src="https://github.com/user-attachments/assets/042cb2f9-0dad-4e6a-bcf7-43ced4bbd704"
/>
<img width="1344" height="738" alt="Image"
src="https://github.com/user-attachments/assets/373cd08e-ec40-4c67-9c51-4d948b1ba617"
/>
<img width="672" height="887" alt="Image"
src="https://github.com/user-attachments/assets/5a34953f-a9a3-4c1e-869e-5eff0dc64c84"
/>
---------
2026-06-12 18:21:30 +08:00
|
|
|
// chat channel
|
2026-06-16 19:15:43 +08:00
|
|
|
chatChannelSet: `${restAPIv1}/chat-channels`,
|
|
|
|
|
chatChannelList: `${restAPIv1}/chat-channels`,
|
|
|
|
|
chatChannelDetail: (id: string) => `${restAPIv1}/chat-channels/${id}`,
|
|
|
|
|
chatChannelUpdate: (id: string) => `${restAPIv1}/chat-channels/${id}`,
|
|
|
|
|
chatChannelDel: (id: string) => `${restAPIv1}/chat-channels/${id}`,
|
2026-06-23 17:45:31 +08:00
|
|
|
chatChannelRuntime: (id: string) =>
|
|
|
|
|
`${restAPIv1}/chat-channels/${id}/runtime`,
|
Feat: chat channels — connect assistants to external messaging bots (#15850)
### What problem does this PR solve?
#15844
Adds a **Chat channels** capability so a RAGFlow assistant (Dialog) can
be exposed as a bot on external messaging platforms (Feishu/Lark,
Discord, Telegram, Slack, WeCom, LINE, etc.). An admin configures a bot
in the UI, connects it to an assistant, and inbound messages are
answered from that assistant's knowledge base — replies are delivered
back on the channel.
**Feishu/Lark is implemented and tested end-to-end.** Discord, Telegram,
LINE, and WeCom are scaffolded against the same interface; the remaining
listed channels are tracked as follow-ups.
### Design
**Backend**
- New `chat_channel` table (`tenant_id`, `name`, `channel`, `config`
JSON holding `{credential: {...}}`, `dialog_id`, `status`) +
`ChatChannelService` and RESTful CRUD under `/api/v1/chat_channels`.
- Channel framework under `api/channels/`: a `core` registry +
per-channel packages that self-register a builder and implement a common
`Channel` interface (`start`/`stop`/`send` + inbound normalization) over
`IncomingMessage`/`OutgoingMessage`.
- Embedded **reconcile loop** in `ragflow_server`
(`api/channels/bootstrap.py`): loads enabled bots, and
starts/stops/restarts them as rows change (no server restart needed).
Inbound messages run the connected dialog via the non-streaming
completion path, keeping per-end-user conversation history.
- Missing optional channel SDKs degrade gracefully (channel skipped with
a warning; others unaffected). Channel-level errors are logged, not
crashed.
- Feishu's WebSocket client runs in a dedicated thread with its own
event loop to avoid cross-loop/contextvars conflicts with the channel
runtime.
**Frontend**
- **Settings → Chat channels** panel: available-channels grid +
configured-bots list with add/edit/delete and a **Connect assistant**
popup that binds a bot to a dialog.
- Brand icons via simple-icons / reused shared data-source assets, with
colored fallbacks for brands not available.
- Route, sidebar entry, i18n (en/zh), and a top-nav segment-boundary fix
so the settings page no longer highlights the Chat tab.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### Notes
- DB: new `chat_channel` table is auto-created; `chat_channel.dialog_id`
is also covered by a `migrate_db` `alter_db_add_column` for existing
installs.
- Channel SDKs (`lark-oapi`, `discord.py`, `python-telegram-bot`,
`line-bot-sdk`, `wechatpy`, `aiohttp`) added to dependencies.
- Screenshots / per-channel credential docs to follow.
<img width="1338" height="1290" alt="Image"
src="https://github.com/user-attachments/assets/042cb2f9-0dad-4e6a-bcf7-43ced4bbd704"
/>
<img width="1344" height="738" alt="Image"
src="https://github.com/user-attachments/assets/373cd08e-ec40-4c67-9c51-4d948b1ba617"
/>
<img width="672" height="887" alt="Image"
src="https://github.com/user-attachments/assets/5a34953f-a9a3-4c1e-869e-5eff0dc64c84"
/>
---------
2026-06-12 18:21:30 +08:00
|
|
|
|
2025-05-16 16:32:19 +08:00
|
|
|
// plugin
|
2026-04-23 17:16:04 +08:00
|
|
|
llmTools: `${restAPIv1}/plugin/tools`,
|
2025-05-16 16:32:19 +08:00
|
|
|
|
2026-04-22 10:49:11 +08:00
|
|
|
chatsTranscriptions: `${restAPIv1}/chat/audio/transcription`,
|
2025-12-02 19:39:43 +08:00
|
|
|
|
2024-08-01 17:27:27 +08:00
|
|
|
// knowledge base
|
2025-11-14 13:56:56 +08:00
|
|
|
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
checkEmbedding: (datasetId: string) =>
|
Fix: restore embedding model switching for datasets with existing chunks (#14732)
### What problem does this PR solve?
## Problem
During the REST API refactoring (#13690), the
`/api/v2/kb/check_embedding` endpoint was removed and never migrated to
the new RESTful structure. The frontend was pointed to the
`/api/v1/datasets/{id}/embedding` endpoint (which is `run_embedding` — a
completely different function). Additionally, a hard guard was
introduced that rejects any `embd_id` change when `chunk_num > 0`,
making it impossible to switch embedding models on datasets with
existing chunks.
## Root Cause
1. **Missing endpoint**: The old `check_embedding` logic (sample random
chunks, re-embed with the new model, compare cosine similarity) was not
carried over to the new REST API service layer.
2. **Wrong frontend URL**: `checkEmbedding` in `api.ts` pointed to
`/datasets/{id}/embedding` (`run_embedding`) instead of a dedicated
check endpoint.
3. **Overly restrictive guard**: `dataset_api_service.py` line 310
blocked all `embd_id` updates when `chunk_num > 0`. This check did not
exist in the pre-refactor code — it was incorrectly introduced during
the refactor.
## Changes
### Backend
- **`api/apps/services/dataset_api_service.py`**
- Remove the `chunk_num > 0` hard guard on `embd_id` updates
- Add `check_embedding()` service function: samples random chunks,
re-embeds them with the candidate model, computes cosine similarity,
returns compatibility result (avg ≥ 0.9 = compatible)
- Add `import re` for the `_clean()` helper
- **`api/apps/restful_apis/dataset_api.py`**
- Add `POST /datasets/<dataset_id>/embedding/check` endpoint following
the new REST API conventions
- Clean up unused top-level imports (`random`, `re`, `numpy`)
### Frontend
- **`web/src/utils/api.ts`**
- Fix `checkEmbedding` URL from `/datasets/${datasetId}/embedding` →
`/datasets/${datasetId}/embedding/check`
### Tests
-
**`test/testcases/test_http_api/test_dataset_management/test_update_dataset.py`**
- Update `test_embedding_model_with_existing_chunks` to assert success
(`code == 0`) instead of expecting the old `102` error
-
**`test/testcases/test_web_api/test_dataset_management/test_dataset_sdk_routes_unit.py`**
- Update `test_update_route_branch_matrix_unit` to assert
`RetCode.SUCCESS` when updating `embd_id` on a chunked dataset,
replacing the old `chunk_num` error assertion
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-05-09 03:48:57 -07:00
|
|
|
`${restAPIv1}/datasets/${datasetId}/embedding/check`,
|
2026-04-13 21:07:07 +08:00
|
|
|
kbList: `${restAPIv1}/datasets`,
|
|
|
|
|
createKb: `${restAPIv1}/datasets`,
|
|
|
|
|
updateKb: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}`,
|
|
|
|
|
rmKb: `${restAPIv1}/datasets`,
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
getKbDetail: (datasetId: string) => `${restAPIv1}/datasets/${datasetId}`,
|
2025-01-22 19:43:27 +08:00
|
|
|
getKnowledgeGraph: (knowledgeId: string) =>
|
2026-05-08 19:01:35 +08:00
|
|
|
`${restAPIv1}/datasets/${knowledgeId}/graph`,
|
2026-04-28 12:00:26 +00:00
|
|
|
knowledgeGraph: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/graph`,
|
2026-03-19 14:41:36 +08:00
|
|
|
deleteKnowledgeGraph: (knowledgeId: string) =>
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
`${restAPIv1}/datasets/${knowledgeId}/graph`,
|
|
|
|
|
getMeta: `${restAPIv1}/datasets/metadata/flattened`,
|
|
|
|
|
getKnowledgeBasicInfo: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/ingestions/summary`,
|
2025-10-09 12:36:19 +08:00
|
|
|
// data pipeline log
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
fetchDataPipelineLog: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/ingestions`,
|
|
|
|
|
getPipelineDetail: (datasetId: string, logId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/ingestions/${logId}`,
|
|
|
|
|
fetchPipelineDatasetLogs: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/ingestions`,
|
|
|
|
|
runIndex: (datasetId: string, indexType: string) =>
|
2026-04-29 17:05:56 +08:00
|
|
|
`${restAPIv1}/datasets/${datasetId}/index?type=${indexType.toLowerCase()}`,
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
traceIndex: (datasetId: string, indexType: string) =>
|
2026-04-29 17:05:56 +08:00
|
|
|
`${restAPIv1}/datasets/${datasetId}/index?type=${indexType.toLowerCase()}`,
|
feat(graphrag): fix merge concurrency and add resume-from-checkpoint (#14238)
This PR addresses three related GraphRAG reliability issues that
together allow long-running GraphRAG tasks (10+ hours of LLM extraction)
to be resumed after a crash or pause without re-doing completed work. It
builds on #14096 (per-doc subgraph cache) and extends the same idea to
the resolution and community-detection phases.
Fixes #14236.
## 1. Fix concurrent merge crash
Long GraphRAG runs would crash near the end of entity resolution with:
```
RuntimeError: dictionary keys changed during iteration
```
in `Extractor._merge_graph_nodes`. Two changes:
- `rag/graphrag/general/extractor.py`: snapshot `graph.neighbors(node1)`
via `list(...)` before iterating, so concurrent `add_edge` /
`remove_node` mutations on the shared `nx.Graph` cannot invalidate the
iterator. Also tracks each redirected neighbour in `node0_neighbors` so
a later merged node sharing the same external neighbour takes the
edge-merge branch instead of overwriting via `add_edge`.
- `rag/graphrag/entity_resolution.py`: serialize the merge step with a
dedicated `asyncio.Semaphore(1)`. `nx.Graph` is not thread-safe and
concurrent merges on overlapping neighbourhoods can produce incorrect
results even with the snapshot fix.
## 2. Don't wipe partial graph on pause
Previously the pause / cancel UI path called
`settings.docStoreConn.delete({"knowledge_graph_kwd": [...]}, ...)`,
destroying every subgraph, entity, relation, and graph row.
Re-triggering then started GraphRAG from scratch even though #14096 had
already added `load_subgraph_from_store`.
After main was merged in (which deleted `api/apps/kb_app.py` per
#14394), the pause path now lives on the new REST surface `DELETE
/v1/datasets/<id>/<index_type>`:
- `api/apps/services/dataset_api_service.py`: `delete_index` accepts a
`wipe: bool = True` parameter. When `False` the doc-store rows and
GraphRAG phase markers are left intact and only the running task is
cancelled. Default preserves historical behaviour.
- `api/apps/restful_apis/dataset_api.py`: parses `?wipe=false|0|no|off`
from the query string and forwards it.
- `web/src/utils/api.ts` + `web/src/services/knowledge-service.ts`:
`unbindPipelineTask` appends `?wipe=false` when explicitly false.
- The GraphRAG pause action in
`web/src/pages/dataset/dataset/generate-button/hook.ts` passes `wipe:
false` for `KnowledgeGraph`; raptor is unchanged.
**UX impact:** the pause icon next to a running GraphRAG task no longer
wipes graph data. The only path that still wipes is the explicit Delete
action in `GenerateLogButton` (trash icon behind a confirmation modal).
## 3. Phase-completion markers (`rag/graphrag/phase_markers.py`)
A small Redis-backed marker layer at
`graphrag:phase:{kb_id}:{resolution_done|community_done}` (7-day TTL).
`run_graphrag_for_kb` consults the markers on entry and skips phases
that already completed in a prior run. Markers are cleared automatically
when:
- new docs are merged into the graph (which invalidates prior resolution
and community results),
- `delete_index` wipes the graph, or
- `delete_knowledge_graph` is called.
Redis failures never block a run -- markers are an optimization, not a
gate.
## 4. Idempotent community detection
`extract_community` previously did `delete-then-insert` on
`community_report` rows; a crash mid-insert left the dataset with no
reports. Now report IDs are derived deterministically from `(kb_id,
community.title)`, the existing report IDs are snapshotted before
insert, new rows are written, then only stale rows are pruned. A failure
at any step leaves either the prior or the new report set intact --
never a partial mix.
## 5. Tunable doc-store insert pipeline
The GraphRAG insert loop in `rag/graphrag/utils.py` and the
`community_report` insert in `rag/graphrag/general/index.py` were both
hardcoded to `es_bulk_size = 4` and ran strictly sequentially. On a real
KB this meant 1077 chunks took ~21 minutes for a 100-chunk slice -- pure
round-trip overhead.
- New `insert_chunks_bounded()` helper in `rag/graphrag/utils.py`
batches inserts via a bounded `asyncio.Semaphore`. Same retry / timeout
semantics as the prior loop.
- Defaults: 64 docs per batch, 4 batches in flight (matches the regular
ingest pipeline in `document_service.py`). Tunable per-deployment via
`GRAPHRAG_INSERT_BULK_SIZE` and `GRAPHRAG_INSERT_CONCURRENCY`.
- Both `set_graph` and `extract_community` now use the helper.
This dropped the same 1077-chunk insert from minutes to seconds in local
testing without measurable extra pressure on Infinity (total in-flight
docs ≤ `BULK_SIZE × CONCURRENCY` = 256 by default).
## Tests
- `test/unit_test/rag/graphrag/test_merge_graph_nodes.py` (3 tests):
dense neighbourhood merge, neighbour-snapshot regression, concurrent
serialized merges.
- `test/unit_test/rag/graphrag/test_phase_markers.py` (4 tests): set/has
round-trip, kb-scoped clear, no-op on empty input, graceful Redis
failure.
-
`test/testcases/test_web_api/test_dataset_management/test_dataset_sdk_routes_unit.py`:
new `test_delete_index_wipe_flag_unit` covers `wipe=false` for both
GraphRAG and raptor on the new REST route, and confirms the default
still wipes and clears phase markers.
## Compatibility
- Backward compatible: tasks queued before this change behave
identically (default `wipe=true`, no markers expected).
- No schema/migration changes; all new state lives in Redis.
- New optional REST query param `wipe` on `DELETE
/v1/datasets/<id>/<index_type>`.
- New optional env vars `GRAPHRAG_INSERT_BULK_SIZE` and
`GRAPHRAG_INSERT_CONCURRENCY`; defaults preserve safe behaviour.
## Example of resume
Screenshot below shows a test resuming knowledge graph generation after
applying the concurrency fix and re-deploying.
<img width="521" height="677" alt="image"
src="https://github.com/user-attachments/assets/9ef0d405-cbb3-420d-a1a1-e51f3e7e9b7a"
/>
### Type of change
- [X] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
2026-05-06 02:01:01 -05:00
|
|
|
unbindPipelineTask: (datasetId: string, indexType: string, wipe?: boolean) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/${indexType.toLowerCase()}${wipe === false ? '?wipe=false' : ''}`,
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
pipelineRerun: `${webAPI}/canvas/rerun`,
|
2026-04-10 18:41:30 +08:00
|
|
|
getMetaData: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/metadata/summary`,
|
2026-04-23 12:04:34 +08:00
|
|
|
updateDocumentsMetadata: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/metadatas`,
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
kbUpdateMetaData: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/metadata/config`,
|
2026-04-22 20:01:31 +08:00
|
|
|
documentUpdateMetaDataConfig: (datasetId: string, documentId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}/metadata/config`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
2025-01-06 18:58:42 +08:00
|
|
|
// tags
|
feat(api): add unified index API and dataset management endpoints (#14222)
### What problem does this PR solve?
## Summary
Refactor the dataset API layer into a clean service/REST separation
pattern, add a unified `/index` API for graph/raptor/mindmap operations,
and introduce several new dataset management endpoints with full test
coverage.
## Changes
### Service Layer (`dataset_api_service.py`)
- Added `trace_index(dataset_id, tenant_id, index_type)` — unified trace
function for all index types
- Added `run_index`, `delete_index` service functions
- Added `get_dataset`, `get_ingestion_summary`, `list_ingestion_logs`,
`get_ingestion_log`
- Added `run_embedding`, `list_tags`, `aggregate_tags`, `delete_tags`,
`rename_tag`
- Added `get_flattened_metadata`, `get_auto_metadata`,
`update_auto_metadata`
### REST API Layer (`dataset_api.py`)
**New unified routes:**
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Run index
task |
| GET | `/datasets/<id>/index?type=graph\|raptor\|mindmap` | Trace index
task |
| DELETE | `/datasets/<id>/<index_type>` | Delete index |
| GET | `/datasets/<id>` | Get dataset details |
| GET | `/datasets/<id>/ingestions/summary` | Ingestion summary |
| GET | `/datasets/<id>/ingestions` | List ingestion logs |
| GET | `/datasets/<id>/ingestions/<log_id>` | Get single ingestion log
|
| POST | `/datasets/<id>/embedding` | Run embedding |
| GET | `/datasets/<id>/tags` | List tags |
| GET | `/datasets/tags/aggregation` | Aggregate tags across datasets |
| DELETE | `/datasets/<id>/tags` | Delete tags |
| PUT | `/datasets/<id>/tags` | Rename tag |
| GET | `/datasets/metadata/flattened` | Get flattened metadata |
| GET/PUT | `/datasets/<id>/metadata/config` | New metadata config path
|
**Removed routes (replaced by unified `/index`):**
- `POST /datasets/<id>/mindmap`
- `GET /datasets/<id>/mindmap`
**Preserved legacy routes (backward compatibility):**
- `/run_graphrag`, `/trace_graphrag`, `/run_raptor`, `/trace_raptor`
- `/auto_metadata` GET/PUT
### Test Suite
- Updated `common.py` helpers: added `trace_index`, removed
`run_mindmap`/`trace_mindmap`
- Added 7 new test files with 39 test cases total:
| Test File | Cases |
|-----------|-------|
| `test_get_dataset.py` | 4 |
| `test_ingestion_summary.py` | 2 |
| `test_ingestion_logs.py` | 5 |
| `test_index_api.py` | 14 |
| `test_embedding.py` | 2 |
| `test_tags.py` | 8 |
| `test_flattened_metadata.py` | 4 |
- Deleted `test_mindmap_tasks.py` (covered by unified index tests)
## Design Decisions
1. **Unified `/index?type=...`** — single endpoint replaces 3 separate
route pairs for graph/raptor/mindmap
2. **Backward compatibility** — old routes (`/run_graphrag`,
`/run_raptor`, `/auto_metadata`) preserved alongside new paths
3. **`_VALID_INDEX_TYPES = {"graph", "raptor", "mindmap"}`** — input
validation via constant set
4. **`_INDEX_TYPE_TO_TASK_ID_FIELD`** — maps index type to KB model task
ID field for clean dispatch
## Files Changed
- `api/apps/restful_apis/dataset_api.py`
- `api/apps/services/dataset_api_service.py`
- `sdk/python/ragflow_sdk/modules/dataset.py`
- `test/testcases/test_http_api/common.py`
- `test/testcases/test_http_api/test_dataset_management/` (7 new files)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
---------
Signed-off-by: noob <yixiao121314@outlook.com>
2026-04-27 01:38:01 +00:00
|
|
|
listTag: (knowledgeId: string) => `${restAPIv1}/datasets/${knowledgeId}/tags`,
|
|
|
|
|
listTagByKnowledgeIds: `${restAPIv1}/datasets/tags/aggregation`,
|
|
|
|
|
removeTag: (knowledgeId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${knowledgeId}/tags`,
|
|
|
|
|
renameTag: (knowledgeId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${knowledgeId}/tags`,
|
2025-01-06 18:58:42 +08:00
|
|
|
|
2024-08-01 17:27:27 +08:00
|
|
|
// chunk
|
2026-04-23 14:17:23 +08:00
|
|
|
chunkList: (datasetId: string, documentId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}/chunks`,
|
|
|
|
|
chunkDetail: (datasetId: string, documentId: string, chunkId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}/chunks/${chunkId}`,
|
2026-05-08 20:20:09 +08:00
|
|
|
retrievalTest: `${restAPIv1}/datasets/search`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
|
|
|
|
// document
|
2026-04-20 14:54:40 +08:00
|
|
|
getDocumentList: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents`,
|
2026-04-27 20:00:23 +08:00
|
|
|
documentChangeStatus: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/batch-update-status`,
|
2026-04-22 10:49:52 +08:00
|
|
|
documentDelete: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents`,
|
2026-04-13 21:07:07 +08:00
|
|
|
documentRename: (datasetId: string, documentId: string) =>
|
2026-04-09 11:17:38 +08:00
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
2026-04-27 21:25:58 +08:00
|
|
|
documentIngest: `${restAPIv1}/documents/ingest`,
|
2026-04-27 10:18:16 +08:00
|
|
|
documentCreate: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents?type=empty`,
|
2026-04-27 23:42:57 +08:00
|
|
|
documentChangeParser: (datasetId: string, documentId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
2026-05-14 10:59:06 +08:00
|
|
|
getDatasetDocumentFileDownload: (datasetId: string, documentId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
2026-04-27 21:29:09 +08:00
|
|
|
documentThumbnails: `${restAPIv1}/thumbnails`,
|
2026-05-08 13:26:13 +08:00
|
|
|
getDocumentFile: `${restAPIv1}/documents`,
|
2026-04-15 11:27:43 +08:00
|
|
|
documentUpload: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents`,
|
2026-04-27 10:18:16 +08:00
|
|
|
webCrawl: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents?type=web`,
|
2026-04-27 16:58:42 +08:00
|
|
|
documentInfoUpload: `${restAPIv1}/documents/upload`,
|
2026-04-07 15:08:11 +08:00
|
|
|
setMeta: `${webAPI}/document/set_meta`,
|
2026-04-21 18:55:30 +08:00
|
|
|
getDatasetFilter: (datasetId: string) =>
|
|
|
|
|
`${restAPIv1}/datasets/${datasetId}/documents?type=filter`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
|
|
|
|
// chat
|
2026-04-07 15:08:11 +08:00
|
|
|
createChat: `${restAPIv1}/chats`,
|
|
|
|
|
listChats: `${restAPIv1}/chats`,
|
|
|
|
|
getChat: (chatId: string) => `${restAPIv1}/chats/${chatId}`,
|
|
|
|
|
updateChat: (chatId: string) => `${restAPIv1}/chats/${chatId}`,
|
|
|
|
|
patchChat: (chatId: string) => `${restAPIv1}/chats/${chatId}`,
|
|
|
|
|
deleteChat: (chatId: string) => `${restAPIv1}/chats/${chatId}`,
|
|
|
|
|
bulkDeleteChats: `${restAPIv1}/chats`,
|
|
|
|
|
createSession: (chatId: string) => `${restAPIv1}/chats/${chatId}/sessions`,
|
|
|
|
|
listSessions: (chatId: string) => `${restAPIv1}/chats/${chatId}/sessions`,
|
2026-04-02 20:49:23 +08:00
|
|
|
getSession: (chatId: string, sessionId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/chats/${chatId}/sessions/${sessionId}`,
|
2026-04-02 20:49:23 +08:00
|
|
|
updateSession: (chatId: string, sessionId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/chats/${chatId}/sessions/${sessionId}`,
|
|
|
|
|
removeSessions: (chatId: string) => `${restAPIv1}/chats/${chatId}/sessions`,
|
2026-04-02 20:49:23 +08:00
|
|
|
deleteMessage: (chatId: string, sessionId: string, msgId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/chats/${chatId}/sessions/${sessionId}/messages/${msgId}`,
|
2026-04-02 20:49:23 +08:00
|
|
|
thumbup: (chatId: string, sessionId: string, msgId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/chats/${chatId}/sessions/${sessionId}/messages/${msgId}/feedback`,
|
2026-04-22 10:49:11 +08:00
|
|
|
completionUrl: `${restAPIv1}/chat/completions`,
|
|
|
|
|
chatsTts: `${restAPIv1}/chat/audio/speech`,
|
|
|
|
|
searchCompletion: (searchId: string) =>
|
2026-05-06 17:19:22 +08:00
|
|
|
`${restAPIv1}/searches/${searchId}/completions`,
|
2026-04-22 10:49:11 +08:00
|
|
|
chatsMindmap: `${restAPIv1}/chat/mindmap`,
|
2026-04-28 12:55:16 +08:00
|
|
|
chatsRelatedQuestions: `${restAPIv1}/chat/recommendation`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
2025-08-06 11:42:40 +08:00
|
|
|
// next chat
|
2026-04-07 15:08:11 +08:00
|
|
|
fetchExternalChatInfo: (id: string) => `${restAPIv1}/chatbots/${id}/info`,
|
2025-08-06 11:42:40 +08:00
|
|
|
|
2024-08-01 17:27:27 +08:00
|
|
|
// file manager
|
2026-04-07 15:08:11 +08:00
|
|
|
listFile: `${restAPIv1}/files`,
|
|
|
|
|
uploadFile: `${restAPIv1}/files`,
|
|
|
|
|
removeFile: `${restAPIv1}/files`,
|
|
|
|
|
getAllParentFolder: `${restAPIv1}/files`,
|
|
|
|
|
createFolder: `${restAPIv1}/files`,
|
2026-04-23 11:40:45 +08:00
|
|
|
connectFileToKnowledge: `${restAPIv1}/files/link-to-datasets`,
|
2026-04-07 15:08:11 +08:00
|
|
|
getFile: `${restAPIv1}/files`,
|
|
|
|
|
moveFile: `${restAPIv1}/files/move`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
|
|
|
|
// system
|
2026-04-07 19:07:47 +08:00
|
|
|
getSystemVersion: `${restAPIv1}/system/version`,
|
2026-04-08 15:26:18 +08:00
|
|
|
getSystemTokenList: `${restAPIv1}/system/tokens`,
|
|
|
|
|
createSystemToken: `${restAPIv1}/system/tokens`,
|
|
|
|
|
removeSystemToken: `${restAPIv1}/system/tokens`,
|
2026-04-23 14:09:42 +08:00
|
|
|
getSystemConfig: `${restAPIv1}/system/config`,
|
2026-04-23 17:16:32 +08:00
|
|
|
setLangfuseConfig: `${restAPIv1}/langfuse/api-key`,
|
2024-08-01 17:27:27 +08:00
|
|
|
|
|
|
|
|
// flow
|
2026-04-24 10:02:22 +08:00
|
|
|
listAgentTemplate: `${restAPIv1}/agents/templates`,
|
|
|
|
|
listAgents: `${restAPIv1}/agents`,
|
feat: add tag management for Agents with filtering and sorting (#14774) (#14799)
## Summary
Closes #14774.
Adds free-form tags on agents (UserCanvas) with full UI + API:
- Stored as comma-separated `tags` column on `UserCanvas` with online
migration.
- New endpoints: `GET /v1/agents/tags` (aggregate counts) and `PUT
/v1/agent/<id>/tags` (write). `GET /v1/agents` accepts a `tags=` query.
- "Edit tags" item in agent dropdown opens a chip-style editor dialog;
tags render as badges on each agent card.
- New "Tags" facet in the agents filter bar, with counts.
## Implementation notes
- **Tag matching is exact-token**: the SQL filter wraps stored tags as
`,…,` and matches `,ml,` so `ml` doesn't match `ml-ops`.
- **Server-side normalization** in `UserCanvasService.update_tags`:
dedup (case-insensitive), per-tag cap of 64 chars, total length capped
at 512 chars to fit the column, commas inside tag values are replaced
with spaces.
- **Tenant authorization**: `PUT /v1/agent/<id>/tags` gates on
`UserCanvasService.accessible(canvas_id, tenant_id)`.
- **Tag listing scope**: `UserCanvasService.list_tags` follows the same
own + team-shared rule as `get_by_tenant_ids`.
- **i18n**: keys added to `en.ts` and `zh.ts` only (per project
convention; other locales fall back).
- **`HomeCard`** gets a non-breaking `extra?: ReactNode` slot for the
chip row; no `src/components/ui/` files modified.
## Test plan
- [ ] Backend boot runs `migrate_db` → confirm `user_canvas.tags` column
exists (`DESCRIBE user_canvas`).
- [ ] Agents page renders cards normally (no console error from missing
field).
- [ ] `⋯ → Edit tags` opens a dialog that stays open (regression: dialog
was unmounting with the dropdown).
- [ ] Typing a tag without pressing Enter and clicking Save persists it
(regression: last typed tag was being dropped).
- [ ] Chip input supports Enter/comma to commit, Backspace on empty to
remove, `×` to remove individual chip.
- [ ] Tag containing a comma sent via API is stored with the comma
replaced by a space.
- [ ] 20 long tags sent via API does not error (length cap silently
truncates).
- [ ] "Tags" filter in the filter bar shows counts and narrows the list.
- [ ] Filtering by `ml` does **not** return agents tagged `ml-ops`.
- [ ] UI in Chinese shows 编辑标签 / 添加标签以整理和筛选你的智能体 etc.
- [ ] `PUT /v1/agent/<other-tenant-id>/tags` returns `Agent not found or
no permission.`
2026-05-13 06:41:32 -07:00
|
|
|
listAgentTags: `${restAPIv1}/agents/tags`,
|
2026-05-15 12:29:52 +08:00
|
|
|
updateAgentTags: (agentId: string) => `${restAPIv1}/agents/${agentId}/tags`,
|
2026-04-24 10:02:22 +08:00
|
|
|
createAgent: `${restAPIv1}/agents`,
|
|
|
|
|
updateAgent: (agentId: string) => `${restAPIv1}/agents/${agentId}`,
|
|
|
|
|
deleteAgent: (agentId: string) => `${restAPIv1}/agents/${agentId}`,
|
2026-05-06 17:19:22 +08:00
|
|
|
agentChatCompletion: `${restAPIv1}/agents/chat/completions`,
|
2026-04-24 10:02:22 +08:00
|
|
|
resetAgent: (agentId: string) => `${restAPIv1}/agents/${agentId}/reset`,
|
|
|
|
|
testDbConnect: `${restAPIv1}/agents/test_db_connection`,
|
2026-04-07 15:08:11 +08:00
|
|
|
getInputElements: `${webAPI}/canvas/input_elements`,
|
2026-04-24 10:02:22 +08:00
|
|
|
debug: (agentId: string, componentId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/components/${componentId}/debug`,
|
|
|
|
|
trace: (agentId: string, messageId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/logs/${messageId}`,
|
2026-04-27 19:16:37 +08:00
|
|
|
cancelCanvas: (taskId: string) => `${restAPIv1}/tasks/${taskId}/cancel`,
|
2025-07-18 17:54:32 +08:00
|
|
|
// agent
|
2026-04-24 10:02:22 +08:00
|
|
|
inputForm: (agentId: string, componentId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/components/${componentId}/input-form`,
|
|
|
|
|
fetchVersionList: (id: string) => `${restAPIv1}/agents/${id}/versions`,
|
|
|
|
|
fetchVersion: (agentId: string, versionId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/versions/${versionId}`,
|
|
|
|
|
getAgent: (id: string) => `${restAPIv1}/agents/${id}`,
|
|
|
|
|
uploadAgentFile: (id?: string) => `${restAPIv1}/agents/${id}/upload`,
|
|
|
|
|
createAgentSession: (agentId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/sessions`,
|
2026-04-07 15:08:11 +08:00
|
|
|
fetchAgentLogs: (canvasId: string) => `${webAPI}/canvas/${canvasId}/sessions`,
|
2026-04-24 10:02:22 +08:00
|
|
|
fetchAgentSessions: (agentId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/sessions`,
|
|
|
|
|
fetchAgentSessionById: (agentId: string, sessionId: string) =>
|
|
|
|
|
`${restAPIv1}/agents/${agentId}/sessions/${sessionId}`,
|
2025-07-31 09:34:45 +08:00
|
|
|
fetchExternalAgentInputs: (canvasId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/agentbots/${canvasId}/inputs`,
|
2026-04-24 10:02:22 +08:00
|
|
|
prompt: `${restAPIv1}/agents/prompts`,
|
2026-04-27 19:16:37 +08:00
|
|
|
cancelDataflow: (id: string) => `${restAPIv1}/tasks/${id}/cancel`,
|
2026-05-22 15:22:05 +08:00
|
|
|
getAttachmentFileDownload: (docId: string) =>
|
2026-05-25 17:11:24 +08:00
|
|
|
`${restAPIv1}/agents/attachments/${docId}/download`,
|
2026-04-24 10:02:22 +08:00
|
|
|
downloadFile: `${restAPIv1}/agents/download`,
|
2026-04-24 17:55:53 +08:00
|
|
|
testWebhook: (id: string) => `${restAPIv1}/agents/${id}/webhook/test`,
|
|
|
|
|
fetchWebhookTrace: (id: string) => `${restAPIv1}/agents/${id}/webhook/logs`,
|
2025-06-23 17:45:35 +08:00
|
|
|
|
2026-02-09 19:53:51 +08:00
|
|
|
// explore
|
|
|
|
|
|
2025-06-23 17:45:35 +08:00
|
|
|
// mcp server
|
2026-04-23 12:51:27 +08:00
|
|
|
listMcpServer: `${restAPIv1}/mcp/servers`,
|
|
|
|
|
getMcpServer: (id: string) => `${restAPIv1}/mcp/servers/${id}`,
|
|
|
|
|
createMcpServer: `${restAPIv1}/mcp/servers`,
|
|
|
|
|
updateMcpServer: (id: string) => `${restAPIv1}/mcp/servers/${id}`,
|
|
|
|
|
deleteMcpServer: (id: string) => `${restAPIv1}/mcp/servers/${id}`,
|
|
|
|
|
importMcpServer: `${restAPIv1}/mcp/servers/import`,
|
|
|
|
|
exportMcpServer: (id: string) =>
|
|
|
|
|
`${restAPIv1}/mcp/servers/${id}?mode=download`,
|
|
|
|
|
testMcpServer: (id: string) => `${restAPIv1}/mcp/servers/${id}/test`,
|
2025-08-11 10:34:22 +08:00
|
|
|
|
|
|
|
|
// next-search
|
2026-04-07 15:08:11 +08:00
|
|
|
createSearch: `${restAPIv1}/searches`,
|
|
|
|
|
getSearchList: `${restAPIv1}/searches`,
|
2026-03-26 01:07:41 +08:00
|
|
|
deleteSearch: (params: { search_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/searches/${params.search_id}`,
|
2026-03-26 01:07:41 +08:00
|
|
|
getSearchDetail: (params: { search_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/searches/${params.search_id}`,
|
|
|
|
|
getSearchDetailShare: `${restAPIv1}/searchbots/detail`,
|
2026-03-26 01:07:41 +08:00
|
|
|
updateSearchSetting: (params: { search_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/searches/${params.search_id}`,
|
|
|
|
|
askShare: `${restAPIv1}/searchbots/ask`,
|
|
|
|
|
mindmapShare: `${restAPIv1}/searchbots/mindmap`,
|
|
|
|
|
getRelatedQuestionsShare: `${restAPIv1}/searchbots/related_questions`,
|
|
|
|
|
retrievalTestShare: `${restAPIv1}/searchbots/retrieval_test`,
|
2025-10-09 12:36:19 +08:00
|
|
|
|
2025-12-08 10:17:56 +08:00
|
|
|
// memory
|
2026-04-07 15:08:11 +08:00
|
|
|
createMemory: `${restAPIv1}/memories`,
|
|
|
|
|
getMemoryList: `${restAPIv1}/memories`,
|
|
|
|
|
getMemoryConfig: (id: string) => `${restAPIv1}/memories/${id}/config`,
|
|
|
|
|
deleteMemory: (id: string) => `${restAPIv1}/memories/${id}`,
|
|
|
|
|
getMemoryDetail: (id: string) => `${restAPIv1}/memories/${id}`,
|
|
|
|
|
updateMemorySetting: (id: string) => `${restAPIv1}/memories/${id}`,
|
2025-12-17 12:35:26 +08:00
|
|
|
deleteMemoryMessage: (data: { memory_id: string; message_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/messages/${data.memory_id}:${data.message_id}`,
|
2025-12-17 12:35:26 +08:00
|
|
|
getMessageContent: (data: { memory_id: string; message_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/messages/${data.memory_id}:${data.message_id}/content`,
|
2025-12-17 12:35:26 +08:00
|
|
|
updateMessageState: (data: { memory_id: string; message_id: string }) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/messages/${data.memory_id}:${data.message_id}`,
|
2025-12-08 10:17:56 +08:00
|
|
|
|
2025-10-09 12:36:19 +08:00
|
|
|
// data pipeline
|
2026-04-07 15:08:11 +08:00
|
|
|
fetchDataflow: (id: string) => `${webAPI}/dataflow/get/${id}`,
|
|
|
|
|
setDataflow: `${webAPI}/dataflow/set`,
|
|
|
|
|
removeDataflow: `${webAPI}/dataflow/rm`,
|
|
|
|
|
listDataflow: `${webAPI}/dataflow/list`,
|
|
|
|
|
runDataflow: `${webAPI}/dataflow/run`,
|
2025-10-28 22:25:43 +08:00
|
|
|
|
|
|
|
|
// admin
|
2026-04-07 15:08:11 +08:00
|
|
|
adminLogin: `${restAPIv1}/admin/login`,
|
|
|
|
|
adminLogout: `${restAPIv1}/admin/logout`,
|
|
|
|
|
adminListUsers: `${restAPIv1}/admin/users`,
|
|
|
|
|
adminCreateUser: `${restAPIv1}/admin/users`,
|
2026-01-23 18:08:46 +08:00
|
|
|
adminSetSuperuser: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/admin`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminGetUserDetails: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminUpdateUserStatus: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/activate`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminUpdateUserPassword: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/password`,
|
|
|
|
|
adminDeleteUser: (username: string) => `${restAPIv1}/admin/users/${username}`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminListUserDatasets: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/datasets`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminListUserAgents: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/agents`,
|
2025-10-28 22:25:43 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
adminListServices: `${restAPIv1}/admin/services`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminShowServiceDetails: (serviceId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/services/${serviceId}`,
|
2025-10-28 22:25:43 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
adminListRoles: `${restAPIv1}/admin/roles`,
|
|
|
|
|
adminListRolesWithPermission: `${restAPIv1}/admin/roles_with_permission`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminGetRolePermissions: (roleName: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/roles/${roleName}/permissions`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminAssignRolePermissions: (roleName: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/roles/${roleName}/permission`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminRevokeRolePermissions: (roleName: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/roles/${roleName}/permission`,
|
|
|
|
|
adminCreateRole: `${restAPIv1}/admin/roles`,
|
|
|
|
|
adminDeleteRole: (roleName: string) => `${restAPIv1}/admin/roles/${roleName}`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminUpdateRoleDescription: (roleName: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/roles/${roleName}`,
|
2025-10-28 22:25:43 +08:00
|
|
|
|
|
|
|
|
adminUpdateUserRole: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/role`,
|
2025-10-28 22:25:43 +08:00
|
|
|
adminGetUserPermissions: (username: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/users/${username}/permissions`,
|
2025-10-28 22:25:43 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
adminListResources: `${restAPIv1}/admin/roles/resource`,
|
2025-11-03 09:52:23 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
adminListWhitelist: `${restAPIv1}/admin/whitelist`,
|
|
|
|
|
adminCreateWhitelistEntry: `${restAPIv1}/admin/whitelist/add`,
|
2025-11-03 09:52:23 +08:00
|
|
|
adminUpdateWhitelistEntry: (id: number) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/whitelist/${id}`,
|
2025-11-03 09:52:23 +08:00
|
|
|
adminDeleteWhitelistEntry: (email: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/whitelist/${email}`,
|
|
|
|
|
adminImportWhitelist: `${restAPIv1}/admin/whitelist/batch`,
|
2025-11-11 15:20:37 +08:00
|
|
|
|
2026-04-07 15:08:11 +08:00
|
|
|
adminGetSystemVersion: `${restAPIv1}/admin/version`,
|
feat: Implement pluggable multi-provider sandbox architecture (#12820)
## Summary
Implement a flexible sandbox provider system supporting both
self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for
secure code execution in agent workflows.
**Key Changes:**
- ✅ Aliyun Code Interpreter provider using official
`agentrun-sdk>=0.0.16`
- ✅ Self-managed provider with gVisor (runsc) security
- ✅ Arguments parameter support for dynamic code execution
- ✅ Database-only configuration (removed fallback logic)
- ✅ Configuration scripts for quick setup
Issue #12479
## Features
### 🔌 Provider Abstraction Layer
**1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`)
- Wraps existing executor_manager HTTP API
- gVisor (runsc) for secure container isolation
- Configurable pool size, timeout, retry logic
- Languages: Python, Node.js, JavaScript
- ⚠️ **Requires**: gVisor installation, Docker, base images
**2. Aliyun Code Interpreter**
(`agent/sandbox/providers/aliyun_codeinterpreter.py`)
- SaaS integration using official agentrun-sdk
- Serverless microVM execution with auto-authentication
- Hard timeout: 30 seconds max
- Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`,
`AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION`
- Automatically wraps code to call `main()` function
**3. E2B Provider** (`agent/sandbox/providers/e2b.py`)
- Placeholder for future integration
### ⚙️ Configuration System
- `conf/system_settings.json`: Default provider =
`aliyun_codeinterpreter`
- `agent/sandbox/client.py`: Enforces database-only configuration
- Admin UI: `/admin/sandbox-settings`
- Configuration validation via `validate_config()` method
- Health checks for all providers
### 🎯 Key Capabilities
**Arguments Parameter Support:**
All providers support passing arguments to `main()` function:
```python
# User code
def main(name: str, count: int) -> dict:
return {"message": f"Hello {name}!" * count}
# Executed with: arguments={"name": "World", "count": 3}
# Result: {"message": "Hello World!Hello World!Hello World!"}
```
**Self-Describing Providers:**
Each provider implements `get_config_schema()` returning form
configuration for Admin UI
**Error Handling:**
Structured `ExecutionResult` with stdout, stderr, exit_code,
execution_time
## Configuration Scripts
Two scripts for quick Aliyun sandbox setup:
**Shell Script (requires jq):**
```bash
source scripts/configure_aliyun_sandbox.sh
```
**Python Script (interactive):**
```bash
python3 scripts/configure_aliyun_sandbox.py
```
## Testing
```bash
# Unit tests
uv run pytest agent/sandbox/tests/test_providers.py -v
# Aliyun provider tests
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
# Integration tests (requires credentials)
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
# Quick SDK validation
python3 agent/sandbox/tests/verify_sdk.py
```
**Test Coverage:**
- 30 unit tests for provider abstraction
- Provider-specific tests for Aliyun
- Integration tests with real API
- Security tests for executor_manager
## Documentation
- `docs/develop/sandbox_spec.md` - Complete architecture specification
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy
sandbox
- `agent/sandbox/tests/QUICKSTART.md` - Quick start guide
- `agent/sandbox/tests/README.md` - Testing documentation
## Breaking Changes
⚠️ **Migration Required:**
1. **Directory Move**: `sandbox/` → `agent/sandbox/`
- Update imports: `from sandbox.` → `from agent.sandbox.`
2. **Mandatory Configuration**:
- SystemSettings must have `sandbox.provider_type` configured
- Removed fallback default values
- Configuration must exist in database (from
`conf/system_settings.json`)
3. **Aliyun Credentials**:
- Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`)
- `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID)
4. **Self-Managed Provider**:
- gVisor (runsc) must be installed for security
- Install: `go install gvisor.dev/gvisor/runsc@latest`
## Database Schema Changes
```python
# SystemSettings.value: CharField → TextField
api/db/db_models.py: Changed for unlimited config length
# SystemSettingsService.get_by_name(): Fixed query precision
api/db/services/system_settings_service.py: startswith → exact match
```
## Files Changed
### Backend (Python)
- `agent/sandbox/providers/base.py` - SandboxProvider ABC interface
- `agent/sandbox/providers/manager.py` - ProviderManager
- `agent/sandbox/providers/self_managed.py` - Self-managed provider
- `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider
- `agent/sandbox/providers/e2b.py` - E2B provider (placeholder)
- `agent/sandbox/client.py` - Unified client (enforces DB-only config)
- `agent/tools/code_exec.py` - Updated to use provider system
- `admin/server/services.py` - SandboxMgr with registry & validation
- `admin/server/routes.py` - 5 sandbox API endpoints
- `conf/system_settings.json` - Default: aliyun_codeinterpreter
- `api/db/db_models.py` - TextField for SystemSettings.value
- `api/db/services/system_settings_service.py` - Exact match query
### Frontend (TypeScript/React)
- `web/src/pages/admin/sandbox-settings.tsx` - Settings UI
- `web/src/services/admin-service.ts` - Sandbox service functions
- `web/src/services/admin.service.d.ts` - Type definitions
- `web/src/utils/api.ts` - Sandbox API endpoints
### Documentation
- `docs/develop/sandbox_spec.md` - Architecture spec
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide
- `agent/sandbox/tests/QUICKSTART.md` - Quick start
- `agent/sandbox/tests/README.md` - Testing guide
### Configuration Scripts
- `scripts/configure_aliyun_sandbox.sh` - Shell script (jq)
- `scripts/configure_aliyun_sandbox.py` - Python script
### Tests
- `agent/sandbox/tests/test_providers.py` - 30 unit tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` -
Integration tests
- `agent/sandbox/tests/verify_sdk.py` - SDK validation
## Architecture
```
Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B]
↓
SystemSettings
```
## Usage
### 1. Configure Provider
**Via Admin UI:**
1. Navigate to `/admin/sandbox-settings`
2. Select provider (Aliyun Code Interpreter / Self-Managed)
3. Fill in configuration
4. Click "Test Connection" to verify
5. Click "Save" to apply
**Via Configuration Scripts:**
```bash
# Aliyun provider
export AGENTRUN_ACCESS_KEY_ID="xxx"
export AGENTRUN_ACCESS_KEY_SECRET="yyy"
export AGENTRUN_ACCOUNT_ID="zzz"
export AGENTRUN_REGION="cn-shanghai"
source scripts/configure_aliyun_sandbox.sh
```
### 2. Restart Service
```bash
cd docker
docker compose restart ragflow-server
```
### 3. Execute Code in Agent
```python
from agent.sandbox.client import execute_code
result = execute_code(
code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}',
language="python",
timeout=30,
arguments={"name": "World"}
)
print(result.stdout) # {"message": "Hello World!"}
```
## Troubleshooting
### "Container pool is busy" (Self-Managed)
- **Cause**: Pool exhausted (default: 1 container in `.env`)
- **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+
### "Sandbox provider type not configured"
- **Cause**: Database missing configuration
- **Fix**: Run config script or set via Admin UI
### "gVisor not found"
- **Cause**: runsc not installed
- **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp
~/go/bin/runsc /usr/local/bin/`
### Aliyun authentication errors
- **Cause**: Wrong environment variable names
- **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`)
## Checklist
- [x] All tests passing (30 unit tests + integration tests)
- [x] Documentation updated (spec, migration guide, quickstart)
- [x] Type definitions added (TypeScript)
- [x] Admin UI implemented
- [x] Configuration validation
- [x] Health checks implemented
- [x] Error handling with structured results
- [x] Breaking changes documented
- [x] Configuration scripts created
- [x] gVisor requirements documented
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
|
|
|
|
|
|
|
|
// Sandbox settings
|
2026-04-07 15:08:11 +08:00
|
|
|
adminListSandboxProviders: `${restAPIv1}/admin/sandbox/providers`,
|
feat: Implement pluggable multi-provider sandbox architecture (#12820)
## Summary
Implement a flexible sandbox provider system supporting both
self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for
secure code execution in agent workflows.
**Key Changes:**
- ✅ Aliyun Code Interpreter provider using official
`agentrun-sdk>=0.0.16`
- ✅ Self-managed provider with gVisor (runsc) security
- ✅ Arguments parameter support for dynamic code execution
- ✅ Database-only configuration (removed fallback logic)
- ✅ Configuration scripts for quick setup
Issue #12479
## Features
### 🔌 Provider Abstraction Layer
**1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`)
- Wraps existing executor_manager HTTP API
- gVisor (runsc) for secure container isolation
- Configurable pool size, timeout, retry logic
- Languages: Python, Node.js, JavaScript
- ⚠️ **Requires**: gVisor installation, Docker, base images
**2. Aliyun Code Interpreter**
(`agent/sandbox/providers/aliyun_codeinterpreter.py`)
- SaaS integration using official agentrun-sdk
- Serverless microVM execution with auto-authentication
- Hard timeout: 30 seconds max
- Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`,
`AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION`
- Automatically wraps code to call `main()` function
**3. E2B Provider** (`agent/sandbox/providers/e2b.py`)
- Placeholder for future integration
### ⚙️ Configuration System
- `conf/system_settings.json`: Default provider =
`aliyun_codeinterpreter`
- `agent/sandbox/client.py`: Enforces database-only configuration
- Admin UI: `/admin/sandbox-settings`
- Configuration validation via `validate_config()` method
- Health checks for all providers
### 🎯 Key Capabilities
**Arguments Parameter Support:**
All providers support passing arguments to `main()` function:
```python
# User code
def main(name: str, count: int) -> dict:
return {"message": f"Hello {name}!" * count}
# Executed with: arguments={"name": "World", "count": 3}
# Result: {"message": "Hello World!Hello World!Hello World!"}
```
**Self-Describing Providers:**
Each provider implements `get_config_schema()` returning form
configuration for Admin UI
**Error Handling:**
Structured `ExecutionResult` with stdout, stderr, exit_code,
execution_time
## Configuration Scripts
Two scripts for quick Aliyun sandbox setup:
**Shell Script (requires jq):**
```bash
source scripts/configure_aliyun_sandbox.sh
```
**Python Script (interactive):**
```bash
python3 scripts/configure_aliyun_sandbox.py
```
## Testing
```bash
# Unit tests
uv run pytest agent/sandbox/tests/test_providers.py -v
# Aliyun provider tests
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v
# Integration tests (requires credentials)
uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v
# Quick SDK validation
python3 agent/sandbox/tests/verify_sdk.py
```
**Test Coverage:**
- 30 unit tests for provider abstraction
- Provider-specific tests for Aliyun
- Integration tests with real API
- Security tests for executor_manager
## Documentation
- `docs/develop/sandbox_spec.md` - Complete architecture specification
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy
sandbox
- `agent/sandbox/tests/QUICKSTART.md` - Quick start guide
- `agent/sandbox/tests/README.md` - Testing documentation
## Breaking Changes
⚠️ **Migration Required:**
1. **Directory Move**: `sandbox/` → `agent/sandbox/`
- Update imports: `from sandbox.` → `from agent.sandbox.`
2. **Mandatory Configuration**:
- SystemSettings must have `sandbox.provider_type` configured
- Removed fallback default values
- Configuration must exist in database (from
`conf/system_settings.json`)
3. **Aliyun Credentials**:
- Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`)
- `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID)
4. **Self-Managed Provider**:
- gVisor (runsc) must be installed for security
- Install: `go install gvisor.dev/gvisor/runsc@latest`
## Database Schema Changes
```python
# SystemSettings.value: CharField → TextField
api/db/db_models.py: Changed for unlimited config length
# SystemSettingsService.get_by_name(): Fixed query precision
api/db/services/system_settings_service.py: startswith → exact match
```
## Files Changed
### Backend (Python)
- `agent/sandbox/providers/base.py` - SandboxProvider ABC interface
- `agent/sandbox/providers/manager.py` - ProviderManager
- `agent/sandbox/providers/self_managed.py` - Self-managed provider
- `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider
- `agent/sandbox/providers/e2b.py` - E2B provider (placeholder)
- `agent/sandbox/client.py` - Unified client (enforces DB-only config)
- `agent/tools/code_exec.py` - Updated to use provider system
- `admin/server/services.py` - SandboxMgr with registry & validation
- `admin/server/routes.py` - 5 sandbox API endpoints
- `conf/system_settings.json` - Default: aliyun_codeinterpreter
- `api/db/db_models.py` - TextField for SystemSettings.value
- `api/db/services/system_settings_service.py` - Exact match query
### Frontend (TypeScript/React)
- `web/src/pages/admin/sandbox-settings.tsx` - Settings UI
- `web/src/services/admin-service.ts` - Sandbox service functions
- `web/src/services/admin.service.d.ts` - Type definitions
- `web/src/utils/api.ts` - Sandbox API endpoints
### Documentation
- `docs/develop/sandbox_spec.md` - Architecture spec
- `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide
- `agent/sandbox/tests/QUICKSTART.md` - Quick start
- `agent/sandbox/tests/README.md` - Testing guide
### Configuration Scripts
- `scripts/configure_aliyun_sandbox.sh` - Shell script (jq)
- `scripts/configure_aliyun_sandbox.py` - Python script
### Tests
- `agent/sandbox/tests/test_providers.py` - 30 unit tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests
- `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` -
Integration tests
- `agent/sandbox/tests/verify_sdk.py` - SDK validation
## Architecture
```
Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B]
↓
SystemSettings
```
## Usage
### 1. Configure Provider
**Via Admin UI:**
1. Navigate to `/admin/sandbox-settings`
2. Select provider (Aliyun Code Interpreter / Self-Managed)
3. Fill in configuration
4. Click "Test Connection" to verify
5. Click "Save" to apply
**Via Configuration Scripts:**
```bash
# Aliyun provider
export AGENTRUN_ACCESS_KEY_ID="xxx"
export AGENTRUN_ACCESS_KEY_SECRET="yyy"
export AGENTRUN_ACCOUNT_ID="zzz"
export AGENTRUN_REGION="cn-shanghai"
source scripts/configure_aliyun_sandbox.sh
```
### 2. Restart Service
```bash
cd docker
docker compose restart ragflow-server
```
### 3. Execute Code in Agent
```python
from agent.sandbox.client import execute_code
result = execute_code(
code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}',
language="python",
timeout=30,
arguments={"name": "World"}
)
print(result.stdout) # {"message": "Hello World!"}
```
## Troubleshooting
### "Container pool is busy" (Self-Managed)
- **Cause**: Pool exhausted (default: 1 container in `.env`)
- **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+
### "Sandbox provider type not configured"
- **Cause**: Database missing configuration
- **Fix**: Run config script or set via Admin UI
### "gVisor not found"
- **Cause**: runsc not installed
- **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp
~/go/bin/runsc /usr/local/bin/`
### Aliyun authentication errors
- **Cause**: Wrong environment variable names
- **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`)
## Checklist
- [x] All tests passing (30 unit tests + integration tests)
- [x] Documentation updated (spec, migration guide, quickstart)
- [x] Type definitions added (TypeScript)
- [x] Admin UI implemented
- [x] Configuration validation
- [x] Health checks implemented
- [x] Error handling with structured results
- [x] Breaking changes documented
- [x] Configuration scripts created
- [x] gVisor requirements documented
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
|
|
|
adminGetSandboxProviderSchema: (providerId: string) =>
|
2026-04-07 15:08:11 +08:00
|
|
|
`${restAPIv1}/admin/sandbox/providers/${providerId}/schema`,
|
|
|
|
|
adminGetSandboxConfig: `${restAPIv1}/admin/sandbox/config`,
|
|
|
|
|
adminSetSandboxConfig: `${restAPIv1}/admin/sandbox/config`,
|
|
|
|
|
adminTestSandboxConnection: `${restAPIv1}/admin/sandbox/test`,
|
2026-04-30 12:36:03 +08:00
|
|
|
|
|
|
|
|
// Skill spaces
|
|
|
|
|
skillSpaces: `${restAPIv1}/skills/spaces`,
|
|
|
|
|
skillSpace: (spaceId: string) => `${restAPIv1}/skills/spaces/${spaceId}`,
|
|
|
|
|
skillSpaceByFolder: `${restAPIv1}/skills/space/by-folder`,
|
|
|
|
|
skillConfig: `${restAPIv1}/skills/config`,
|
|
|
|
|
skillSearch: `${restAPIv1}/skills/search`,
|
|
|
|
|
skillIndex: `${restAPIv1}/skills/index`,
|
|
|
|
|
skillReindex: `${restAPIv1}/skills/reindex`,
|
2024-08-01 17:27:27 +08:00
|
|
|
};
|