internal/agent/tool/code_exec.go

//
//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
//  Licensed under the Apache License, Version 2.0 (the "License");
//  you may not use this file except in compliance with the License.
//  You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
//  Unless required by applicable law or agreed to in writing, software
//  distributed under the License is distributed on an "AS IS" BASIS,
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//  See the License for the specific language governing permissions and
//  limitations under the License.
//

package tool

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"os"
	"strings"

	"github.com/cloudwego/eino/components/tool"
	"github.com/cloudwego/eino/schema"
)

// ErrCodeExecSandboxMissing is returned when no sandbox client is
// registered. The Python sandbox itself is kept as-is (the Go
// side never reimplemented the sandbox). When a client is
// registered via SetSandboxClient at boot, the tool dispatches
// the execution.
var ErrCodeExecSandboxMissing = errors.New(
	"CodeExec sandbox client not registered — call SetSandboxClient at boot",
)

const codeExecToolName = "execute_code"

const codeExecToolDescription = "This tool has a sandbox that can execute code written in 'Python'/'Javascript'. " +
	"It receives a piece of code and returns a JSON string."

// codeExecArgs is the JSON shape the model sends in. The Python
// tool accepts "lang" + "script"; we also accept "code" as a
// synonym since some DSLs and tests use that spelling.
type codeExecArgs struct {
	Language string         `json:"language,omitempty"`
	Lang     string         `json:"lang,omitempty"`
	Script   string         `json:"script,omitempty"`
	Code     string         `json:"code,omitempty"`
	Args     map[string]any `json:"arguments,omitempty"`
	// Timeout is the per-execution wall-clock budget in seconds. 0
	// (the default) defers to the sandbox provider's own default
	// (typically 30s). Mirrors Python's
	// `code_exec.py:358 timeout_seconds = int(os.environ.get(...))`
	// but the value flows per-call rather than per-process, which
	// lets the model dial up/down for known-fast vs. known-slow
	// scripts.
	Timeout int `json:"timeout,omitempty"`
}

// codeExecResult is the JSON envelope returned to the model. The output
// shape mirrors the Python tool's `content` / `_ERROR` / `actual_type`
// fields so downstream nodes can pattern-match unchanged. Artifacts and
// Attachments are surfaced for the model and downstream component
// consumption (e.g. Message component's artifact markdown formatter).
type codeExecResult struct {
	Content     string           `json:"content,omitempty"`
	ActualType  string           `json:"actual_type,omitempty"`
	Stub        bool             `json:"stub,omitempty"`
	Error       string           `json:"_ERROR,omitempty"`
	ExitCode    int              `json:"exit_code,omitempty"`
	Stdout      string           `json:"stdout,omitempty"`
	Stderr      string           `json:"stderr,omitempty"`
	Artifacts   []map[string]any `json:"_ARTIFACTS,omitempty"`
	Attachments []map[string]any `json:"attachments,omitempty"`
}

// CodeExecTool is the  for the CodeExec tool
// ( . It validates language +
// non-empty code and returns a structured "not-yet-wired" error.
type CodeExecTool struct{}

// NewCodeExecTool returns a CodeExecTool implementing eino's
// tool.InvokableTool interface.
func NewCodeExecTool() *CodeExecTool {
	return &CodeExecTool{}
}

// Info returns the tool's metadata for the chat model. The schema mirrors
// the Python CodeExecParam ToolMeta (plan , 字段对齐).
func (c *CodeExecTool) Info(_ context.Context) (*schema.ToolInfo, error) {
	return &schema.ToolInfo{
		Name: codeExecToolName,
		Desc: codeExecToolDescription,
		ParamsOneOf: schema.NewParamsOneOfByParams(map[string]*schema.ParameterInfo{
			"language": {
				Type:     schema.String,
				Desc:     "The programming language of the code. Allowed: 'python' (or 'python3'), 'javascript' (or 'nodejs').",
				Enum:     []string{"python", "python3", "javascript", "nodejs"},
				Required: true,
			},
			"code": {
				Type:     schema.String,
				Desc:     "The code to execute. Must define a `main` function (Python) or export `main` (JavaScript).",
				Required: true,
			},
		}),
	}, nil
}

// InvokableRun validates the inputs and dispatches to the
// registered sandbox client via SetSandboxClient.
func (c *CodeExecTool) InvokableRun(ctx context.Context, argumentsInJSON string, _ ...tool.Option) (string, error) {
	var args codeExecArgs
	if argumentsInJSON == "" {
		return codeExecStubResult("arguments are required"), errors.New("code_exec: empty arguments")
	}
	if err := json.Unmarshal([]byte(argumentsInJSON), &args); err != nil {
		return codeExecStubResult("invalid JSON: " + err.Error()),
			fmt.Errorf("code_exec: parse arguments: %w", err)
	}

	lang := normalizeCodeExecLang(args.Language, args.Lang)
	if lang == "" {
		return codeExecStubResult("unsupported language: must be 'python'/'python3'/'javascript'/'nodejs'"),
			errors.New("code_exec: invalid language")
	}

	script := args.Script
	if script == "" {
		script = args.Code
	}
	if strings.TrimSpace(script) == "" {
		return codeExecStubResult("code is required"), errors.New("code_exec: empty code")
	}

	// Dispatch to the registered SandboxClient. When the default
	// stub is in place, the call surfaces
	// ErrCodeExecSandboxMissing; once a real client is
	// installed via SetSandboxClient at boot, the script runs.
	client := GetSandboxClient()
	resp, err := client.ExecuteCode(ctx, SandboxRequest{
		Lang:      lang,
		Script:    script,
		Arguments: args.Args,
		Timeout:   args.Timeout,
	})
	if err != nil {
		return codeExecStubResult(err.Error()), err
	}
	out, mErr := codeExecResultJSON(resp)
	if mErr != nil {
		return codeExecStubResult(mErr.Error()), mErr
	}
	return out, nil
}

// codeExecResultJSON serializes a SandboxResponse into the envelope
// the eino tool contract returns. Field mapping mirrors the Python
// tool's `code_exec.py:385-490` `_process_execution_result`:
//
//   - Stdout / Stderr / ExitCode: stream directly through.
//   - Returned → Content (the model's natural "what did main() give
//     us back" field).
//   - StructuredResult["actual_type"] → ActualType (Python
//     `infer_actual_type` surface for downstream Message component).
//   - Metadata["artifacts"] → Artifacts (the model AND the Message
//     component's `_ARTIFACTS` collector both consume this; we
//     surface it as `_ARTIFACTS` to match the Python envelope).
//   - Metadata["attachments"] → Attachments (rendered into
//     downstream Markdown by Message via the same path the Agent
//     tool artifact markdown uses).
//
// Artifacts / Attachments with the wrong element type (anything
// other than map[string]any) are silently dropped with a log
// warning. This matches the Python tool's "skip on shape mismatch"
// semantics — better to lose one artifact than to abort the run.
func codeExecResultJSON(r *SandboxResponse) (string, error) {
	if r == nil {
		return codeExecStubResult("empty response"), nil
	}
	out := codeExecResult{
		Content:  r.Returned,
		ExitCode: r.ExitCode,
		Stdout:   r.Stdout,
		Stderr:   r.Stderr,
	}
	if r.StructuredResult != nil {
		if v, ok := r.StructuredResult["actual_type"].(string); ok {
			out.ActualType = v
		}
	}
	if r.Metadata != nil {
		out.Artifacts = extractArtifactList(r.Metadata, "artifacts")
		out.Attachments = extractArtifactList(r.Metadata, "attachments")
	}
	b, err := json.Marshal(out)
	if err != nil {
		return "", fmt.Errorf("code_exec: marshal result: %w", err)
	}
	return string(b), nil
}

// extractArtifactList pulls a list of dict-shaped entries out of
// Metadata[key]. Items that aren't map[string]any are dropped with
// a stderr log line so the operator can see the data loss without
// the run aborting.
func extractArtifactList(meta map[string]any, key string) []map[string]any {
	raw, ok := meta[key]
	if !ok {
		return nil
	}
	arr, ok := raw.([]any)
	if !ok {
		return nil
	}
	out := make([]map[string]any, 0, len(arr))
	for i, item := range arr {
		m, ok := item.(map[string]any)
		if !ok {
			fmt.Fprintf(os.Stderr, "code_exec: %s[%d] is %T, expected map[string]any; dropping\n", key, i, item)
			continue
		}
		out = append(out, m)
	}
	return out
}

func codeExecStubResult(msg string) string {
	b, err := json.Marshal(codeExecResult{
		Stub:  true,
		Error: msg,
	})
	if err != nil {
		return fmt.Sprintf(`{"_ERROR":"code_exec: marshal stub: %s","stub":true}`, err)
	}
	return string(b)
}

// normalizeCodeExecLang accepts the model's literal "language" or the
// Python-style "lang" alias and maps synonyms to the canonical "python" /
// "nodejs" forms used by the Python sandbox.
func normalizeCodeExecLang(primary, alias string) string {
	v := strings.ToLower(strings.TrimSpace(primary))
	if v == "" {
		v = strings.ToLower(strings.TrimSpace(alias))
	}
	switch v {
	case "python", "python3":
		return "python"
	case "javascript", "js", "nodejs", "node":
		return "nodejs"
	}
	return ""
}