Files
ragflow/agent/sandbox/providers/self_managed.py
Yongteng Lei 3064895bbb Fix: import error in sandbox provider (#13971)
### What problem does this PR solve?

Fix import error in sandbox provider.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Chores**
* Updated internal configuration import mechanism for sandbox provider
initialization. No end-user impact.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-04-08 15:35:30 +08:00

365 lines
12 KiB
Python

#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Self-managed sandbox provider implementation.
This provider wraps the existing executor_manager HTTP API which manages
a pool of Docker containers with gVisor for secure code execution.
"""
import base64
import time
import uuid
from typing import Dict, Any, List, Optional
import requests
from .base import SandboxProvider, SandboxInstance, ExecutionResult
class SelfManagedProvider(SandboxProvider):
"""
Self-managed sandbox provider using Daytona/Docker.
This provider communicates with the executor_manager HTTP API
which manages a pool of containers for code execution.
"""
def __init__(self):
self.endpoint: str = "http://localhost:9385"
self.timeout: int = 30
self.max_retries: int = 3
self.pool_size: int = 10
self._initialized: bool = False
def initialize(self, config: Dict[str, Any]) -> bool:
"""
Initialize the provider with configuration.
Args:
config: Configuration dictionary with keys:
- endpoint: HTTP endpoint (default: "http://localhost:9385")
- timeout: Request timeout in seconds (default: 30)
- max_retries: Maximum retry attempts (default: 3)
- pool_size: Container pool size for info (default: 10)
Returns:
True if initialization successful, False otherwise
"""
self.endpoint = config.get("endpoint", "http://localhost:9385")
self.timeout = config.get("timeout", 30)
self.max_retries = config.get("max_retries", 3)
self.pool_size = config.get("pool_size", 10)
# Validate endpoint is accessible
if not self.health_check():
# Try to fall back to SANDBOX_HOST from settings if we are using localhost
if "localhost" in self.endpoint or "127.0.0.1" in self.endpoint:
try:
from common import settings
if settings.SANDBOX_HOST and settings.SANDBOX_HOST not in self.endpoint:
original_endpoint = self.endpoint
self.endpoint = f"http://{settings.SANDBOX_HOST}:9385"
if self.health_check():
import logging
logging.warning(f"Sandbox self_managed: Connected using settings.SANDBOX_HOST fallback: {self.endpoint} (original: {original_endpoint})")
self._initialized = True
return True
else:
self.endpoint = original_endpoint # Restore if fallback also fails
except ImportError:
pass
return False
self._initialized = True
return True
def create_instance(self, template: str = "python") -> SandboxInstance:
"""
Create a new sandbox instance.
Note: For self-managed provider, instances are managed internally
by the executor_manager's container pool. This method returns
a logical instance handle.
Args:
template: Programming language (python, nodejs)
Returns:
SandboxInstance object
Raises:
RuntimeError: If instance creation fails
"""
if not self._initialized:
raise RuntimeError("Provider not initialized. Call initialize() first.")
# Normalize language
language = self._normalize_language(template)
# The executor_manager manages instances internally via container pool
# We create a logical instance ID for tracking
instance_id = str(uuid.uuid4())
return SandboxInstance(
instance_id=instance_id,
provider="self_managed",
status="running",
metadata={
"language": language,
"endpoint": self.endpoint,
"pool_size": self.pool_size,
}
)
def execute_code(
self,
instance_id: str,
code: str,
language: str,
timeout: int = 10,
arguments: Optional[Dict[str, Any]] = None
) -> ExecutionResult:
"""
Execute code in the sandbox.
Args:
instance_id: ID of the sandbox instance (not used for self-managed)
code: Source code to execute
language: Programming language (python, nodejs, javascript)
timeout: Maximum execution time in seconds
arguments: Optional arguments dict to pass to main() function
Returns:
ExecutionResult containing stdout, stderr, exit_code, and metadata
Raises:
RuntimeError: If execution fails
TimeoutError: If execution exceeds timeout
"""
if not self._initialized:
raise RuntimeError("Provider not initialized. Call initialize() first.")
# Normalize language
normalized_lang = self._normalize_language(language)
# Prepare request
code_b64 = base64.b64encode(code.encode("utf-8")).decode("utf-8")
payload = {
"code_b64": code_b64,
"language": normalized_lang,
"arguments": arguments or {}
}
url = f"{self.endpoint}/run"
exec_timeout = timeout or self.timeout
start_time = time.time()
try:
response = requests.post(
url,
json=payload,
timeout=exec_timeout,
headers={"Content-Type": "application/json"}
)
execution_time = time.time() - start_time
if response.status_code != 200:
raise RuntimeError(
f"HTTP {response.status_code}: {response.text}"
)
result = response.json()
structured_result = result.get("result") or {}
return ExecutionResult(
stdout=result.get("stdout", ""),
stderr=result.get("stderr", ""),
exit_code=result.get("exit_code", 0),
execution_time=execution_time,
metadata={
"status": result.get("status"),
"time_used_ms": result.get("time_used_ms"),
"memory_used_kb": result.get("memory_used_kb"),
"detail": result.get("detail"),
"instance_id": instance_id,
"artifacts": result.get("artifacts", []),
"result_present": structured_result.get("present", False),
"result_value": structured_result.get("value"),
"result_type": structured_result.get("type"),
}
)
except requests.Timeout:
execution_time = time.time() - start_time
raise TimeoutError(
f"Execution timed out after {exec_timeout} seconds"
)
except requests.RequestException as e:
raise RuntimeError(f"HTTP request failed: {str(e)}")
def destroy_instance(self, instance_id: str) -> bool:
"""
Destroy a sandbox instance.
Note: For self-managed provider, instances are returned to the
internal pool automatically by executor_manager after execution.
This is a no-op for tracking purposes.
Args:
instance_id: ID of the instance to destroy
Returns:
True (always succeeds for self-managed)
"""
# The executor_manager manages container lifecycle internally
# Container is returned to pool after execution
return True
def health_check(self) -> bool:
"""
Check if the provider is healthy and accessible.
Returns:
True if provider is healthy, False otherwise
"""
try:
url = f"{self.endpoint}/healthz"
response = requests.get(url, timeout=5)
return response.status_code == 200
except Exception:
return False
def get_supported_languages(self) -> List[str]:
"""
Get list of supported programming languages.
Returns:
List of language identifiers
"""
return ["python", "nodejs", "javascript"]
@staticmethod
def get_config_schema() -> Dict[str, Dict]:
"""
Return configuration schema for self-managed provider.
Returns:
Dictionary mapping field names to their schema definitions
"""
return {
"endpoint": {
"type": "string",
"required": True,
"label": "Executor Manager Endpoint",
"placeholder": "http://localhost:9385",
"default": "http://localhost:9385",
"description": "HTTP endpoint of the executor_manager service"
},
"timeout": {
"type": "integer",
"required": False,
"label": "Request Timeout (seconds)",
"default": 30,
"min": 5,
"max": 300,
"description": "HTTP request timeout for code execution"
},
"max_retries": {
"type": "integer",
"required": False,
"label": "Max Retries",
"default": 3,
"min": 0,
"max": 10,
"description": "Maximum number of retry attempts for failed requests"
},
"pool_size": {
"type": "integer",
"required": False,
"label": "Container Pool Size",
"default": 10,
"min": 1,
"max": 100,
"description": "Size of the container pool (configured in executor_manager)"
}
}
def _normalize_language(self, language: str) -> str:
"""
Normalize language identifier to executor_manager format.
Args:
language: Language identifier (python, python3, nodejs, javascript)
Returns:
Normalized language identifier
"""
if not language:
return "python"
lang_lower = language.lower()
if lang_lower in ("python", "python3"):
return "python"
elif lang_lower in ("javascript", "nodejs"):
return "nodejs"
else:
return language
def validate_config(self, config: dict) -> tuple[bool, Optional[str]]:
"""
Validate self-managed provider configuration.
Performs custom validation beyond the basic schema validation,
such as checking URL format.
Args:
config: Configuration dictionary to validate
Returns:
Tuple of (is_valid, error_message)
"""
# Validate endpoint URL format
endpoint = config.get("endpoint", "")
if endpoint:
# Check if it's a valid HTTP/HTTPS URL or localhost
import re
url_pattern = r'^(https?://|http://localhost|http://[\d\.]+:[a-z]+:[/]|http://[\w\.]+:)'
if not re.match(url_pattern, endpoint):
return False, f"Invalid endpoint format: {endpoint}. Must start with http:// or https://"
# Validate pool_size is positive
pool_size = config.get("pool_size", 10)
if isinstance(pool_size, int) and pool_size <= 0:
return False, "Pool size must be greater than 0"
# Validate timeout is reasonable
timeout = config.get("timeout", 30)
if isinstance(timeout, int) and (timeout < 1 or timeout > 600):
return False, "Timeout must be between 1 and 600 seconds"
# Validate max_retries
max_retries = config.get("max_retries", 3)
if isinstance(max_retries, int) and (max_retries < 0 or max_retries > 10):
return False, "Max retries must be between 0 and 10"
return True, None