// // Copyright 2026 The InfiniFlow Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // package tool import ( "context" "encoding/json" "errors" "strings" "testing" ) func TestCodeExec_StubsErrorWhenClientMissing(t *testing.T) { t.Parallel() c := NewCodeExecTool() out, err := c.InvokableRun(context.Background(), `{"language":"python","code":"def main(): return {}"}`) if !errors.Is(err, ErrCodeExecSandboxMissing) { t.Fatalf("err = %v, want ErrCodeExecSandboxMissing", err) } var got codeExecResult if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output is not valid JSON: %v (raw=%s)", jerr, out) } if !got.Stub { t.Errorf("Stub = false, want true") } if !strings.Contains(got.Error, "sandbox") { t.Errorf("Error = %q, want to mention 'sandbox'", got.Error) } } func TestCodeExec_RejectsEmptyCode(t *testing.T) { t.Parallel() c := NewCodeExecTool() _, err := c.InvokableRun(context.Background(), `{"language":"python","code":""}`) if err == nil || !strings.Contains(err.Error(), "code") { t.Fatalf("err = %v, want to mention empty code", err) } } func TestCodeExec_RejectsBadLanguage(t *testing.T) { t.Parallel() c := NewCodeExecTool() _, err := c.InvokableRun(context.Background(), `{"language":"brainfuck","code":"x"}`) if err == nil || !strings.Contains(err.Error(), "language") { t.Fatalf("err = %v, want to reject unsupported language", err) } } func TestCodeExec_AcceptsLangAlias(t *testing.T) { t.Parallel() c := NewCodeExecTool() // Python tool also accepts "lang" as the field name; the Go shell // should still reach the stub branch. _, err := c.InvokableRun(context.Background(), `{"lang":"nodejs","script":"async function main() {}"}`) if !errors.Is(err, ErrCodeExecSandboxMissing) { t.Fatalf("err = %v, want ErrCodeExecSandboxMissing", err) } } func TestCodeExec_Info(t *testing.T) { t.Parallel() c := NewCodeExecTool() info, err := c.Info(context.Background()) if err != nil { t.Fatalf("Info: %v", err) } if info.Name != "execute_code" { t.Errorf("Name = %q, want execute_code", info.Name) } if !strings.Contains(info.Desc, "Python") { t.Errorf("Desc = %q, want to mention Python", info.Desc) } } // TestCodeExec_ResultExtractsArtifacts pins the artifact // collection: SandboxResponse.Metadata["artifacts"] must be // surfaced as `_ARTIFACTS` in the tool's JSON envelope so the // Message // component's artifact markdown formatter can render them. func TestCodeExec_ResultExtractsArtifacts(t *testing.T) { t.Parallel() resp := &SandboxResponse{ Returned: "ok", ExitCode: 0, Metadata: map[string]any{ "artifacts": []any{ map[string]any{"name": "chart.png", "url": "minio://b/chart.png"}, map[string]any{"name": "data.csv", "url": "minio://b/data.csv"}, }, }, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got codeExecResult if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v (raw=%s)", jerr, out) } if len(got.Artifacts) != 2 { t.Fatalf("Artifacts len = %d, want 2", len(got.Artifacts)) } if got.Artifacts[0]["name"] != "chart.png" { t.Errorf("Artifacts[0][name] = %v, want chart.png", got.Artifacts[0]["name"]) } } // TestCodeExec_ResultDropsBadArtifactShape ensures the extractor // silently drops entries that aren't map[string]any rather than // aborting the run. func TestCodeExec_ResultDropsBadArtifactShape(t *testing.T) { t.Parallel() resp := &SandboxResponse{ Returned: "ok", Metadata: map[string]any{ "artifacts": []any{ "just a string", // bad shape map[string]any{"name": "ok.png"}, // good 42, // bad shape }, }, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got codeExecResult if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v", jerr) } if len(got.Artifacts) != 1 { t.Errorf("Artifacts len = %d, want 1 (bad shapes dropped)", len(got.Artifacts)) } if got.Artifacts[0]["name"] != "ok.png" { t.Errorf("Artifacts[0][name] = %v, want ok.png", got.Artifacts[0]["name"]) } } // TestCodeExec_ResultExtractsAttachments pins the attachments // (rendered to downstream Message markdown) path. Distinct from // artifacts so renderers can route them differently. func TestCodeExec_ResultExtractsAttachments(t *testing.T) { t.Parallel() resp := &SandboxResponse{ Returned: "ok", Metadata: map[string]any{ "attachments": []any{ map[string]any{"name": "report.pdf", "url": "minio://b/report.pdf"}, }, }, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got codeExecResult if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v", jerr) } if len(got.Attachments) != 1 { t.Fatalf("Attachments len = %d, want 1", len(got.Attachments)) } } // TestCodeExec_ResultSurfacesActualType pins the actual_type // surface used by Message component to render the right Markdown // formatting (Number → , Object → JSON dump, etc.). func TestCodeExec_ResultSurfacesActualType(t *testing.T) { t.Parallel() resp := &SandboxResponse{ StructuredResult: map[string]any{ "present": true, "value": map[string]any{ "x": float64(1), }, }, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got codeExecResult if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v", jerr) } if got.ActualType != "Object" { t.Errorf("ActualType = %q, want Object", got.ActualType) } if got.Content != "{\n \"x\": 1\n}" { t.Errorf("Content = %q, want pretty JSON object", got.Content) } } func TestCodeExec_ResultUsesStructuredResultValue(t *testing.T) { t.Parallel() resp := &SandboxResponse{ Returned: "8", StructuredResult: map[string]any{ "present": true, "value": float64(8), }, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got map[string]any if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v", jerr) } if got["raw_result"] != float64(8) { t.Fatalf("raw_result = %#v, want 8", got["raw_result"]) } if got["content"] != "8" { t.Fatalf("content = %#v, want \"8\"", got["content"]) } if got["actual_type"] != "Number" { t.Fatalf("actual_type = %#v, want Number", got["actual_type"]) } } func TestCodeExec_ResultFallsBackToStdoutJSON(t *testing.T) { t.Parallel() resp := &SandboxResponse{ Stdout: `{"a":[1,2]}`, } out, err := codeExecResultJSON(resp) if err != nil { t.Fatalf("codeExecResultJSON: %v", err) } var got map[string]any if jerr := json.Unmarshal([]byte(out), &got); jerr != nil { t.Fatalf("output not valid JSON: %v", jerr) } raw, ok := got["raw_result"].(map[string]any) if !ok { t.Fatalf("raw_result type = %T, want map[string]any", got["raw_result"]) } arr, ok := raw["a"].([]any) if !ok || len(arr) != 2 || arr[0] != float64(1) || arr[1] != float64(2) { t.Fatalf("raw_result[a] = %#v, want [1 2]", raw["a"]) } if got["actual_type"] != "Object" { t.Fatalf("actual_type = %#v, want Object", got["actual_type"]) } if got["content"] != "{\n \"a\": [\n 1,\n 2\n ]\n}" { t.Fatalf("content = %#v, want pretty JSON", got["content"]) } } // TestCodeExec_PassesTimeoutToSandbox verifies the new // `timeout` arg flows into the SandboxRequest.Timeout field so // the model can dial per-script budgets. Note: this test // mutates the global sandbox client; it must NOT run in // parallel with the other CodeExec tests that depend on the // default (loud-fail) stub. func TestCodeExec_PassesTimeoutToSandbox(t *testing.T) { var captured SandboxRequest prev := GetSandboxClient() SetSandboxClient(stubSandbox(func(_ context.Context, req SandboxRequest) (*SandboxResponse, error) { captured = req return &SandboxResponse{Returned: "ok", ExitCode: 0}, nil })) t.Cleanup(func() { SetSandboxClient(prev) }) c := NewCodeExecTool() _, err := c.InvokableRun(context.Background(), `{"language":"python","code":"def main(): return {}","timeout":42}`) if err != nil { t.Fatalf("InvokableRun: %v", err) } if captured.Timeout != 42 { t.Errorf("SandboxRequest.Timeout = %d, want 42", captured.Timeout) } } // TestCodeExec_PassesArgumentsToSandbox verifies the `arguments` // arg (Python `**kwargs` to main()) is propagated. Like the // timeout test, this mutates the global sandbox client and must // not run in parallel with sibling CodeExec tests. func TestCodeExec_PassesArgumentsToSandbox(t *testing.T) { var captured SandboxRequest prev := GetSandboxClient() SetSandboxClient(stubSandbox(func(_ context.Context, req SandboxRequest) (*SandboxResponse, error) { captured = req return &SandboxResponse{Returned: "ok", ExitCode: 0}, nil })) t.Cleanup(func() { SetSandboxClient(prev) }) c := NewCodeExecTool() _, err := c.InvokableRun(context.Background(), `{"language":"python","code":"def main(**kw): return kw","arguments":{"x":1,"y":"z"}}`) if err != nil { t.Fatalf("InvokableRun: %v", err) } if captured.Arguments["x"].(float64) != 1 || captured.Arguments["y"].(string) != "z" { t.Errorf("Arguments = %v, want {x:1, y:z}", captured.Arguments) } } // stubSandbox adapts a function literal to the SandboxClient // interface so the timeout / arguments tests can capture the // request without depending on the default stub. type stubSandbox func(ctx context.Context, req SandboxRequest) (*SandboxResponse, error) func (s stubSandbox) ExecuteCode(ctx context.Context, req SandboxRequest) (*SandboxResponse, error) { return s(ctx, req) }