Files
ragflow/internal/agent/component/llm.go
Zhichang Yu 3fa15c0e2f feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.

## What's included

### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages

### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |

### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7

### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)

### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs

### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00

491 lines
15 KiB
Go

// Package component — LLM (Phase 2 P0, plan §2.11.3 row 5).
//
// One-shot LLM call. Reads system_prompt + user_prompt, dispatches to a
// chat model, and returns the assistant's content. Streaming variant
// forwards incremental chunks via Stream.
//
// Model invocation is abstracted behind a small ChatInvoker interface so
// tests can inject a stub without touching the network. The default
// ChatInvoker is built around models.NewEinoChatModel so production paths
// flow through the eino bridge (plan §2.11.6 D1).
package component
import (
"context"
"encoding/json"
"fmt"
"log"
"strings"
"sync"
"github.com/cloudwego/eino/schema"
"ragflow/internal/entity/models"
)
// LLMComponent is a one-shot chat call.
type LLMComponent struct {
param LLMParam
}
// LLMParam captures the (resolved) DSL parameters for an LLM node.
type LLMParam struct {
ModelID string
SystemPrompt string
UserPrompt string
Temperature *float64
MaxTokens *int
JSONOutput bool
// Driver is the provider driver to use (e.g. "openai", "dummy"). When
// empty, the default ChatInvoker will look up a driver from ModelID
// (e.g. by attempting NewDummyModel for unknown providers).
Driver string
// APIKey overrides the default empty key. Tests may set this; prod
// reads it from env / secret store at higher layers.
APIKey string
// BaseURL overrides the driver default endpoint (e.g. to point the
// "openai" driver at a third-party gateway). Empty defers to the
// driver's built-in default URL.
BaseURL string
}
// LLMInput is the resolved input map the factory / Invoke expects.
type LLMInput struct {
ModelID string
SystemPrompt string
UserPrompt string
Temperature *float64
MaxTokens *int
JSONOutput bool
Driver string
APIKey string
}
// LLMOutput mirrors the outputs map (per plan §2.11.3 row 5):
//
// "content" string, "model" string, "stopped" bool, "tokens" int
//
// JSONOutput=true additionally populates "json" (map[string]any) when the
// content parses as a JSON object.
type LLMOutput struct {
Content string
Model string
Stopped bool
Tokens int
}
// ChatInvoker is the abstraction the LLM component uses to talk to a
// chat model. The default implementation lives in this file; tests can
// override the package-level defaultChatInvoker to inject a stub.
type ChatInvoker interface {
Invoke(ctx context.Context, req ChatInvokeRequest) (*ChatInvokeResponse, error)
}
// ChatInvokeRequest is the minimal surface the LLM component needs to
// dispatch a chat call. Driver / APIKey / ModelName are kept here so the
// invoker can wire the right provider without the component caring.
type ChatInvokeRequest struct {
Driver string
ModelName string
APIKey string
BaseURL string
Messages []schema.Message
Temperature *float64
MaxTokens *int
}
// ChatInvokeResponse mirrors what the LLM component writes to its outputs.
type ChatInvokeResponse struct {
Content string
Model string
Stopped bool
Tokens int
}
// defaultChatInvokerMu guards defaultChatInvoker swaps during tests.
var defaultChatInvokerMu sync.RWMutex
// defaultChatInvoker is the production ChatInvoker. Replaced in tests.
var defaultChatInvoker ChatInvoker = &einoChatInvoker{}
// SetDefaultChatInvoker swaps the package-level ChatInvoker (test helper).
// Pass nil to restore the default. Concurrent-safe.
func SetDefaultChatInvoker(inv ChatInvoker) {
defaultChatInvokerMu.Lock()
defer defaultChatInvokerMu.Unlock()
defaultChatInvoker = inv
}
// getDefaultChatInvoker returns the current default ChatInvoker.
func getDefaultChatInvoker() ChatInvoker {
defaultChatInvokerMu.RLock()
defer defaultChatInvokerMu.RUnlock()
if defaultChatInvoker == nil {
return &einoChatInvoker{}
}
return defaultChatInvoker
}
// einoChatInvoker is the production ChatInvoker — it constructs a fresh
// models.EinoChatModel per call from the request and dispatches.
type einoChatInvoker struct{}
// Invoke satisfies ChatInvoker.
func (e *einoChatInvoker) Invoke(ctx context.Context, req ChatInvokeRequest) (*ChatInvokeResponse, error) {
if req.ModelName == "" {
return nil, fmt.Errorf("component: LLM: model_id is required")
}
driver := req.Driver
if driver == "" {
driver = "dummy"
}
// baseURL: drivers consult map["default"] as the canonical endpoint
// (see internal/entity/models/base_model.go:GetBaseURL). When the
// caller did not override, leave the driver default in place by
// passing nil — every driver seeds its own map at construction time.
var baseURL map[string]string
if req.BaseURL != "" {
baseURL = map[string]string{"default": req.BaseURL}
}
// urlSuffix: each driver appends URLSuffix.Chat to baseURL to form
// the chat-completions endpoint (e.g. "chat/completions" for
// openai-compatible drivers, "v1/messages" for anthropic). The
// factory's NewModelDriver accepts a zero URLSuffix and stores it
// as-is; the openai driver then builds `<base>/` (with no path),
// which is the wrong endpoint for a v1-root base URL. We seed
// the right suffix per driver here so the factory and the
// openai driver's URL construction agree.
urlSuffix := chatURLSuffixFor(driver)
d, err := models.NewModelFactory().CreateModelDriver(driver, baseURL, urlSuffix)
if err != nil {
return nil, fmt.Errorf("component: LLM: resolve driver %q: %w", driver, err)
}
if d == nil {
return nil, fmt.Errorf("component: LLM: no driver for %q", driver)
}
apiKey := req.APIKey
cfg := &models.APIConfig{ApiKey: &apiKey}
cm := models.NewChatModel(d, &req.ModelName, cfg)
chatCfg := &models.ChatConfig{
Temperature: req.Temperature,
MaxTokens: req.MaxTokens,
}
wrapper := models.NewEinoChatModel(cm, chatCfg)
out, err := wrapper.Generate(ctx, toEinoMessages(req.Messages))
if err != nil {
return nil, err
}
return &ChatInvokeResponse{
Content: out.Content,
Model: req.ModelName,
Stopped: true,
Tokens: 0,
}, nil
}
// toEinoMessages converts the LLM component's Message slice to eino's.
func toEinoMessages(msgs []schema.Message) []*schema.Message {
if len(msgs) == 0 {
return nil
}
out := make([]*schema.Message, 0, len(msgs))
for i := range msgs {
m := msgs[i]
role := m.Role
if role == "" {
role = schema.User
}
out = append(out, &schema.Message{Role: role, Content: m.Content})
}
return out
}
// chatURLSuffixFor returns the URLSuffix the factory should pass to
// the driver for the chat endpoint. Each driver's ChatWithMessages
// builds `baseURL/URLSuffix.Chat`, so the suffix has to match the
// provider's actual chat path. We seed the common ones here; for any
// driver the factory has no entry for, we fall through to a default
// "chat/completions" path (the openai-compatible default), which
// matches the dummy driver and any third-party openai-compatible
// gateway.
func chatURLSuffixFor(driver string) models.URLSuffix {
switch strings.ToLower(driver) {
case "anthropic":
return models.URLSuffix{Chat: "v1/messages"}
default:
return models.URLSuffix{Chat: "chat/completions"}
}
}
// NewLLMComponent builds an LLMComponent from raw params.
func NewLLMComponent(p LLMParam) *LLMComponent {
return &LLMComponent{param: p}
}
// Name returns the registered component name.
func (c *LLMComponent) Name() string { return "LLM" }
// Invoke runs the LLM and returns the output map.
func (c *LLMComponent) Invoke(ctx context.Context, inputs map[string]any) (map[string]any, error) {
p := mergeLLMParam(c.param, inputs)
if p.ModelID == "" {
return nil, &ParamError{Field: "model_id", Reason: "required"}
}
if p.UserPrompt == "" && p.SystemPrompt == "" {
return nil, &ParamError{Field: "user_prompt", Reason: "at least one of user_prompt or system_prompt must be set"}
}
// The Anthropic driver (and the openai chat-completions driver
// when the system role is dropped) reject a system-only message
// list with "messages is empty" / 400. v1 fixtures frequently
// ship only a system prompt; fall back to using the system text
// as the user message so the call still goes through. The
// answer text in that case is the model continuing the
// instruction in its reply slot, which is what the v1 fixtures
// also expect.
if p.UserPrompt == "" {
p.UserPrompt = p.SystemPrompt
}
msgs := buildMessages(p.SystemPrompt, p.UserPrompt)
inv := getDefaultChatInvoker()
resp, err := inv.Invoke(ctx, ChatInvokeRequest{
Driver: p.Driver,
ModelName: p.ModelID,
APIKey: p.APIKey,
BaseURL: p.BaseURL,
Messages: msgs,
Temperature: p.Temperature,
MaxTokens: p.MaxTokens,
})
if err != nil {
return nil, fmt.Errorf("component: LLM.Invoke: %w", err)
}
out := map[string]any{
"content": resp.Content,
"model": resp.Model,
"stopped": resp.Stopped,
"tokens": resp.Tokens,
}
if p.JSONOutput {
var parsed map[string]any
if err := json.Unmarshal([]byte(resp.Content), &parsed); err == nil {
out["json"] = parsed
} else {
// Surface a non-fatal warning — caller can still read "content".
log.Printf("component: LLM: json_output=true but content is not valid JSON: %v", err)
}
}
return out, nil
}
// Stream implements Component.Stream. It forwards incremental chunks via
// the returned channel. When the model finishes, the channel is closed.
func (c *LLMComponent) Stream(ctx context.Context, inputs map[string]any) (<-chan map[string]any, error) {
out := make(chan map[string]any, 1)
go func() {
defer close(out)
result, err := c.Invoke(ctx, inputs)
if err != nil {
out <- map[string]any{"error": err.Error()}
return
}
out <- result
}()
return out, nil
}
// Inputs returns parameter metadata for tooling.
func (c *LLMComponent) Inputs() map[string]string {
return map[string]string{
"model_id": "Provider-side model identifier (e.g. \"gpt-4o-mini\")",
"system_prompt": "Optional system prompt prepended to the conversation",
"user_prompt": "User prompt; supports {{cpn_id@param}} references resolved by the canvas engine",
"temperature": "Sampling temperature (0.0-2.0). Optional.",
"max_tokens": "Maximum tokens to generate. Optional.",
"json_output": "If true, attempt to JSON-parse \"content\" into \"json\" output key.",
"driver": "Provider driver name (openai, anthropic, …). Defaults to \"dummy\".",
"api_key": "Override API key for this call. Empty defers to env.",
}
}
// Outputs returns output metadata.
func (c *LLMComponent) Outputs() map[string]string {
return map[string]string{
"content": "Assistant text response",
"model": "Model identifier echoed back (sanity check)",
"stopped": "True if the model finished naturally",
"tokens": "Reported token count (0 when not reported by the driver)",
"json": "When json_output=true and content parses as a JSON object, the parsed map",
}
}
// buildMessages assembles a system + user message sequence. Order:
// system first (if set), then user.
func buildMessages(system, user string) []schema.Message {
out := make([]schema.Message, 0, 2)
if system != "" {
out = append(out, schema.Message{Role: schema.System, Content: system})
}
if user != "" {
out = append(out, schema.Message{Role: schema.User, Content: user})
}
return out
}
// mergeLLMParam layers raw inputs over the receiver's default param set.
//
// v1 DSL aliases accepted alongside the v2 names:
//
// "llm_id" → "model_id"
// "sys_prompt" → "system_prompt"
// "base_url" → "BaseURL"
//
// The v1 fixtures in internal/agent/dsl/testdata/v1_examples use the
// short forms; without these aliases the v1→v2 conversion (plan §2.5)
// would have to be run before the factory builds the component, which
// the e2e compile+invoke path doesn't do.
func mergeLLMParam(base LLMParam, inputs map[string]any) LLMParam {
p := base
if v, ok := stringFrom(inputs, "model_id"); ok {
p.ModelID = v
} else if v, ok := stringFrom(inputs, "llm_id"); ok {
p.ModelID = v
}
if v, ok := stringFrom(inputs, "system_prompt"); ok {
p.SystemPrompt = v
} else if v, ok := stringFrom(inputs, "sys_prompt"); ok {
p.SystemPrompt = v
}
if v, ok := stringFrom(inputs, "user_prompt"); ok {
p.UserPrompt = v
}
if v, ok := boolFrom(inputs, "json_output"); ok {
p.JSONOutput = v
}
if v, ok := stringFrom(inputs, "driver"); ok {
p.Driver = v
}
if v, ok := stringFrom(inputs, "api_key"); ok {
p.APIKey = v
}
if v, ok := stringFrom(inputs, "base_url"); ok {
p.BaseURL = v
}
if v, ok := floatFrom(inputs, "temperature"); ok {
f := v
p.Temperature = &f
}
if v, ok := intFrom(inputs, "max_tokens"); ok {
i := v
p.MaxTokens = &i
}
return p
}
// stringFrom extracts a string from inputs[name], accepting both string and
// fmt.Stringer-able values.
func stringFrom(inputs map[string]any, name string) (string, bool) {
v, ok := inputs[name]
if !ok {
return "", false
}
if s, ok := v.(string); ok {
return s, true
}
return "", false
}
// boolFrom extracts a bool from inputs[name].
func boolFrom(inputs map[string]any, name string) (bool, bool) {
v, ok := inputs[name]
if !ok {
return false, false
}
if b, ok := v.(bool); ok {
return b, true
}
return false, false
}
// floatFrom extracts a float64 from inputs[name], also accepting int.
func floatFrom(inputs map[string]any, name string) (float64, bool) {
v, ok := inputs[name]
if !ok {
return 0, false
}
switch x := v.(type) {
case float64:
return x, true
case float32:
return float64(x), true
case int:
return float64(x), true
case int64:
return float64(x), true
}
return 0, false
}
// intFrom extracts an int from inputs[name], also accepting float64.
func intFrom(inputs map[string]any, name string) (int, bool) {
v, ok := inputs[name]
if !ok {
return 0, false
}
switch x := v.(type) {
case int:
return x, true
case int64:
return int(x), true
case float64:
return int(x), true
}
return 0, false
}
// init registers LLMComponent with the orchestrator-owned registry.
func init() {
Register("LLM", func(params map[string]any) (Component, error) {
var p LLMParam
if v, ok := stringFrom(params, "model_id"); ok {
p.ModelID = v
} else if v, ok := stringFrom(params, "llm_id"); ok {
p.ModelID = v
}
if v, ok := stringFrom(params, "system_prompt"); ok {
p.SystemPrompt = v
} else if v, ok := stringFrom(params, "sys_prompt"); ok {
p.SystemPrompt = v
}
if v, ok := stringFrom(params, "user_prompt"); ok {
p.UserPrompt = v
}
if v, ok := floatFrom(params, "temperature"); ok {
f := v
p.Temperature = &f
}
if v, ok := intFrom(params, "max_tokens"); ok {
i := v
p.MaxTokens = &i
}
if v, ok := boolFrom(params, "json_output"); ok {
p.JSONOutput = v
}
if v, ok := stringFrom(params, "driver"); ok {
p.Driver = v
}
if v, ok := stringFrom(params, "api_key"); ok {
p.APIKey = v
}
if v, ok := stringFrom(params, "base_url"); ok {
p.BaseURL = v
}
return NewLLMComponent(p), nil
})
}