mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Fix: sandbox cannot accept large args list (#14063)
### What problem does this PR solve? Sandbox cannot accept large args list. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -7,7 +7,7 @@ services:
|
||||
runtime: runc
|
||||
privileged: true
|
||||
ports:
|
||||
- "${EXECUTOR_PORT:-9385}:9385"
|
||||
- "${SANDBOX_EXECUTOR_MANAGER_PORT:-9385}:9385"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
networks:
|
||||
|
||||
@@ -56,33 +56,16 @@ def _extract_result_envelope(stdout: str) -> tuple[str, ExecutionStructuredResul
|
||||
return cleaned_stdout, envelope
|
||||
|
||||
|
||||
async def execute_code(req: CodeExecutionRequest):
|
||||
"""Fully asynchronous execution logic"""
|
||||
language = req.language
|
||||
container = await allocate_container_blocking(language)
|
||||
if not container:
|
||||
return CodeExecutionResult(
|
||||
status=ResultStatus.PROGRAM_RUNNER_ERROR,
|
||||
stdout="",
|
||||
stderr="Container pool is busy",
|
||||
exit_code=-10,
|
||||
detail="no_available_container",
|
||||
)
|
||||
def _build_execution_bundle(req: CodeExecutionRequest, workdir: str) -> dict[str, str | bytes]:
|
||||
arguments = req.arguments or {}
|
||||
args_source = json.dumps(arguments, ensure_ascii=False)
|
||||
args_name = "args.json"
|
||||
code_bytes = base64.b64decode(req.code_b64)
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
workdir = f"/tmp/sandbox_{task_id}"
|
||||
os.makedirs(workdir, mode=0o700, exist_ok=True)
|
||||
|
||||
try:
|
||||
if language == SupportLanguage.PYTHON:
|
||||
code_name = "main.py"
|
||||
code_path = os.path.join(workdir, code_name)
|
||||
with open(code_path, "wb") as f:
|
||||
f.write(base64.b64decode(req.code_b64))
|
||||
runner_name = "runner.py"
|
||||
runner_path = os.path.join(workdir, runner_name)
|
||||
with open(runner_path, "w") as f:
|
||||
f.write(f"""import base64
|
||||
if req.language == SupportLanguage.PYTHON:
|
||||
code_name = "main.py"
|
||||
runner_name = "runner.py"
|
||||
runner_source = f"""import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
@@ -109,25 +92,19 @@ def emit_result(value):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = json.loads(sys.argv[1])
|
||||
with open(os.path.join(os.path.dirname(__file__), "args.json"), encoding="utf-8") as f:
|
||||
args = json.load(f)
|
||||
result = main(**args)
|
||||
emit_result(result)
|
||||
""")
|
||||
|
||||
elif language == SupportLanguage.NODEJS:
|
||||
code_name = "main.js"
|
||||
code_path = os.path.join(workdir, code_name)
|
||||
with open(code_path, "wb") as f:
|
||||
f.write(base64.b64decode(req.code_b64))
|
||||
|
||||
runner_name = "runner.js"
|
||||
runner_path = os.path.join(workdir, "runner.js")
|
||||
with open(runner_path, "w") as f:
|
||||
runner_code = """
|
||||
"""
|
||||
elif req.language == SupportLanguage.NODEJS:
|
||||
code_name = "main.js"
|
||||
runner_name = "runner.js"
|
||||
runner_source = """
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const args = JSON.parse(process.argv[2]);
|
||||
const args = JSON.parse(fs.readFileSync(path.join(__dirname, 'args.json'), 'utf8'));
|
||||
const mainPath = path.join(__dirname, 'main.js');
|
||||
const RESULT_MARKER_PREFIX = '__RESULT_MARKER_PREFIX__';
|
||||
|
||||
@@ -185,12 +162,77 @@ if (fs.existsSync(mainPath)) {
|
||||
process.exit(1);
|
||||
}
|
||||
"""
|
||||
f.write(runner_code.replace("__RESULT_MARKER_PREFIX__", RESULT_MARKER_PREFIX))
|
||||
runner_source = runner_source.replace("__RESULT_MARKER_PREFIX__", RESULT_MARKER_PREFIX)
|
||||
else:
|
||||
assert False, "Will never reach here"
|
||||
|
||||
return {
|
||||
"code_name": code_name,
|
||||
"code_bytes": code_bytes,
|
||||
"runner_name": runner_name,
|
||||
"runner_source": runner_source,
|
||||
"args_name": args_name,
|
||||
"args_source": args_source,
|
||||
}
|
||||
|
||||
|
||||
def _build_container_run_args(language: SupportLanguage, task_id: str, container: str, runner_name: str) -> list[str]:
|
||||
run_args = [
|
||||
"docker",
|
||||
"exec",
|
||||
"--workdir",
|
||||
f"/workspace/{task_id}",
|
||||
container,
|
||||
"timeout",
|
||||
str(TIMEOUT),
|
||||
language,
|
||||
]
|
||||
if language == SupportLanguage.PYTHON:
|
||||
run_args.extend(["-I", "-B"])
|
||||
run_args.append(runner_name)
|
||||
return run_args
|
||||
|
||||
|
||||
async def execute_code(req: CodeExecutionRequest):
|
||||
language = req.language
|
||||
container = await allocate_container_blocking(language)
|
||||
if not container:
|
||||
return CodeExecutionResult(
|
||||
status=ResultStatus.PROGRAM_RUNNER_ERROR,
|
||||
stdout="",
|
||||
stderr="Container pool is busy",
|
||||
exit_code=-10,
|
||||
detail="no_available_container",
|
||||
)
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
workdir = f"/tmp/sandbox_{task_id}"
|
||||
os.makedirs(workdir, mode=0o700, exist_ok=True)
|
||||
|
||||
try:
|
||||
bundle = _build_execution_bundle(req, workdir)
|
||||
code_name = str(bundle["code_name"])
|
||||
runner_name = str(bundle["runner_name"])
|
||||
|
||||
code_path = os.path.join(workdir, code_name)
|
||||
with open(code_path, "wb") as f:
|
||||
f.write(bundle["code_bytes"])
|
||||
|
||||
runner_path = os.path.join(workdir, runner_name)
|
||||
with open(runner_path, "w", encoding="utf-8") as f:
|
||||
f.write(str(bundle["runner_source"]))
|
||||
|
||||
args_path = os.path.join(workdir, str(bundle["args_name"]))
|
||||
with open(args_path, "w", encoding="utf-8") as f:
|
||||
f.write(str(bundle["args_source"]))
|
||||
|
||||
returncode, _, stderr = await async_run_command("docker", "exec", container, "mkdir", "-p", f"/workspace/{task_id}", timeout=5)
|
||||
if returncode != 0:
|
||||
raise RuntimeError(f"Directory creation failed: {stderr}")
|
||||
|
||||
tar_proc = await asyncio.create_subprocess_exec("tar", "czf", "-", "-C", workdir, code_name, runner_name, stdout=asyncio.subprocess.PIPE)
|
||||
tar_proc = await asyncio.create_subprocess_exec(
|
||||
"tar", "czf", "-", "-C", workdir, code_name, runner_name, str(bundle["args_name"]), stdout=asyncio.subprocess.PIPE
|
||||
)
|
||||
tar_stdout, _ = await tar_proc.communicate()
|
||||
|
||||
docker_proc = await asyncio.create_subprocess_exec(
|
||||
@@ -203,25 +245,9 @@ if (fs.existsSync(mainPath)) {
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
logger.info(f"Passed in args: {req.arguments}")
|
||||
args_json = json.dumps(req.arguments or {})
|
||||
run_args = [
|
||||
"docker",
|
||||
"exec",
|
||||
"--workdir",
|
||||
f"/workspace/{task_id}",
|
||||
container,
|
||||
"timeout",
|
||||
str(TIMEOUT),
|
||||
language,
|
||||
]
|
||||
if language == SupportLanguage.PYTHON:
|
||||
run_args.extend(["-I", "-B"])
|
||||
elif language == SupportLanguage.NODEJS:
|
||||
pass # no additional flags
|
||||
else:
|
||||
assert False, "Will never reach here"
|
||||
run_args.extend([runner_name, args_json])
|
||||
arguments = req.arguments or {}
|
||||
logger.info("Passed in args keys=%s size_bytes=%s", list(arguments.keys()), len(json.dumps(arguments, ensure_ascii=False).encode("utf-8")))
|
||||
run_args = _build_container_run_args(language=language, task_id=task_id, container=container, runner_name=runner_name)
|
||||
|
||||
returncode, stdout, stderr = await async_run_command(
|
||||
*run_args,
|
||||
@@ -235,7 +261,6 @@ if (fs.existsSync(mainPath)) {
|
||||
logger.info(f"{returncode=}")
|
||||
logger.info(f"{stdout=}")
|
||||
logger.info(f"{stderr=}")
|
||||
logger.info(f"{args_json=}")
|
||||
|
||||
if returncode == 0:
|
||||
clean_stdout, structured_result = _extract_result_envelope(stdout)
|
||||
|
||||
@@ -1739,8 +1739,9 @@ def execute_code(
|
||||
|
||||
1. **Self-managed provider** ([self_managed.py:164](agent/sandbox/providers/self_managed.py:164)):
|
||||
- Passes arguments via HTTP API: `"arguments": arguments or {}`
|
||||
- executor_manager receives and passes to code via command line
|
||||
- Runner script: `args = json.loads(sys.argv[1])` then `result = main(**args)`
|
||||
- executor_manager writes `args.json` into the per-task workspace
|
||||
- Runner script loads arguments from `args.json`
|
||||
- Python runner calls `main(**args)` and JavaScript runner calls `main(args)`
|
||||
|
||||
2. **Aliyun Code Interpreter** ([aliyun_codeinterpreter.py:260-275](agent/sandbox/providers/aliyun_codeinterpreter.py:260-275)):
|
||||
- Wraps user code to call `main(**arguments)` or `main()` if no arguments
|
||||
|
||||
Reference in New Issue
Block a user