mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
491 lines
15 KiB
Go
491 lines
15 KiB
Go
// Package component — LLM (Phase 2 P0, plan §2.11.3 row 5).
|
|
//
|
|
// One-shot LLM call. Reads system_prompt + user_prompt, dispatches to a
|
|
// chat model, and returns the assistant's content. Streaming variant
|
|
// forwards incremental chunks via Stream.
|
|
//
|
|
// Model invocation is abstracted behind a small ChatInvoker interface so
|
|
// tests can inject a stub without touching the network. The default
|
|
// ChatInvoker is built around models.NewEinoChatModel so production paths
|
|
// flow through the eino bridge (plan §2.11.6 D1).
|
|
package component
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/cloudwego/eino/schema"
|
|
|
|
"ragflow/internal/entity/models"
|
|
)
|
|
|
|
// LLMComponent is a one-shot chat call.
|
|
type LLMComponent struct {
|
|
param LLMParam
|
|
}
|
|
|
|
// LLMParam captures the (resolved) DSL parameters for an LLM node.
|
|
type LLMParam struct {
|
|
ModelID string
|
|
SystemPrompt string
|
|
UserPrompt string
|
|
Temperature *float64
|
|
MaxTokens *int
|
|
JSONOutput bool
|
|
|
|
// Driver is the provider driver to use (e.g. "openai", "dummy"). When
|
|
// empty, the default ChatInvoker will look up a driver from ModelID
|
|
// (e.g. by attempting NewDummyModel for unknown providers).
|
|
Driver string
|
|
|
|
// APIKey overrides the default empty key. Tests may set this; prod
|
|
// reads it from env / secret store at higher layers.
|
|
APIKey string
|
|
|
|
// BaseURL overrides the driver default endpoint (e.g. to point the
|
|
// "openai" driver at a third-party gateway). Empty defers to the
|
|
// driver's built-in default URL.
|
|
BaseURL string
|
|
}
|
|
|
|
// LLMInput is the resolved input map the factory / Invoke expects.
|
|
type LLMInput struct {
|
|
ModelID string
|
|
SystemPrompt string
|
|
UserPrompt string
|
|
Temperature *float64
|
|
MaxTokens *int
|
|
JSONOutput bool
|
|
Driver string
|
|
APIKey string
|
|
}
|
|
|
|
// LLMOutput mirrors the outputs map (per plan §2.11.3 row 5):
|
|
//
|
|
// "content" string, "model" string, "stopped" bool, "tokens" int
|
|
//
|
|
// JSONOutput=true additionally populates "json" (map[string]any) when the
|
|
// content parses as a JSON object.
|
|
type LLMOutput struct {
|
|
Content string
|
|
Model string
|
|
Stopped bool
|
|
Tokens int
|
|
}
|
|
|
|
// ChatInvoker is the abstraction the LLM component uses to talk to a
|
|
// chat model. The default implementation lives in this file; tests can
|
|
// override the package-level defaultChatInvoker to inject a stub.
|
|
type ChatInvoker interface {
|
|
Invoke(ctx context.Context, req ChatInvokeRequest) (*ChatInvokeResponse, error)
|
|
}
|
|
|
|
// ChatInvokeRequest is the minimal surface the LLM component needs to
|
|
// dispatch a chat call. Driver / APIKey / ModelName are kept here so the
|
|
// invoker can wire the right provider without the component caring.
|
|
type ChatInvokeRequest struct {
|
|
Driver string
|
|
ModelName string
|
|
APIKey string
|
|
BaseURL string
|
|
Messages []schema.Message
|
|
Temperature *float64
|
|
MaxTokens *int
|
|
}
|
|
|
|
// ChatInvokeResponse mirrors what the LLM component writes to its outputs.
|
|
type ChatInvokeResponse struct {
|
|
Content string
|
|
Model string
|
|
Stopped bool
|
|
Tokens int
|
|
}
|
|
|
|
// defaultChatInvokerMu guards defaultChatInvoker swaps during tests.
|
|
var defaultChatInvokerMu sync.RWMutex
|
|
|
|
// defaultChatInvoker is the production ChatInvoker. Replaced in tests.
|
|
var defaultChatInvoker ChatInvoker = &einoChatInvoker{}
|
|
|
|
// SetDefaultChatInvoker swaps the package-level ChatInvoker (test helper).
|
|
// Pass nil to restore the default. Concurrent-safe.
|
|
func SetDefaultChatInvoker(inv ChatInvoker) {
|
|
defaultChatInvokerMu.Lock()
|
|
defer defaultChatInvokerMu.Unlock()
|
|
defaultChatInvoker = inv
|
|
}
|
|
|
|
// getDefaultChatInvoker returns the current default ChatInvoker.
|
|
func getDefaultChatInvoker() ChatInvoker {
|
|
defaultChatInvokerMu.RLock()
|
|
defer defaultChatInvokerMu.RUnlock()
|
|
if defaultChatInvoker == nil {
|
|
return &einoChatInvoker{}
|
|
}
|
|
return defaultChatInvoker
|
|
}
|
|
|
|
// einoChatInvoker is the production ChatInvoker — it constructs a fresh
|
|
// models.EinoChatModel per call from the request and dispatches.
|
|
type einoChatInvoker struct{}
|
|
|
|
// Invoke satisfies ChatInvoker.
|
|
func (e *einoChatInvoker) Invoke(ctx context.Context, req ChatInvokeRequest) (*ChatInvokeResponse, error) {
|
|
if req.ModelName == "" {
|
|
return nil, fmt.Errorf("component: LLM: model_id is required")
|
|
}
|
|
driver := req.Driver
|
|
if driver == "" {
|
|
driver = "dummy"
|
|
}
|
|
// baseURL: drivers consult map["default"] as the canonical endpoint
|
|
// (see internal/entity/models/base_model.go:GetBaseURL). When the
|
|
// caller did not override, leave the driver default in place by
|
|
// passing nil — every driver seeds its own map at construction time.
|
|
var baseURL map[string]string
|
|
if req.BaseURL != "" {
|
|
baseURL = map[string]string{"default": req.BaseURL}
|
|
}
|
|
// urlSuffix: each driver appends URLSuffix.Chat to baseURL to form
|
|
// the chat-completions endpoint (e.g. "chat/completions" for
|
|
// openai-compatible drivers, "v1/messages" for anthropic). The
|
|
// factory's NewModelDriver accepts a zero URLSuffix and stores it
|
|
// as-is; the openai driver then builds `<base>/` (with no path),
|
|
// which is the wrong endpoint for a v1-root base URL. We seed
|
|
// the right suffix per driver here so the factory and the
|
|
// openai driver's URL construction agree.
|
|
urlSuffix := chatURLSuffixFor(driver)
|
|
d, err := models.NewModelFactory().CreateModelDriver(driver, baseURL, urlSuffix)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("component: LLM: resolve driver %q: %w", driver, err)
|
|
}
|
|
if d == nil {
|
|
return nil, fmt.Errorf("component: LLM: no driver for %q", driver)
|
|
}
|
|
apiKey := req.APIKey
|
|
cfg := &models.APIConfig{ApiKey: &apiKey}
|
|
cm := models.NewChatModel(d, &req.ModelName, cfg)
|
|
|
|
chatCfg := &models.ChatConfig{
|
|
Temperature: req.Temperature,
|
|
MaxTokens: req.MaxTokens,
|
|
}
|
|
wrapper := models.NewEinoChatModel(cm, chatCfg)
|
|
out, err := wrapper.Generate(ctx, toEinoMessages(req.Messages))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &ChatInvokeResponse{
|
|
Content: out.Content,
|
|
Model: req.ModelName,
|
|
Stopped: true,
|
|
Tokens: 0,
|
|
}, nil
|
|
}
|
|
|
|
// toEinoMessages converts the LLM component's Message slice to eino's.
|
|
func toEinoMessages(msgs []schema.Message) []*schema.Message {
|
|
if len(msgs) == 0 {
|
|
return nil
|
|
}
|
|
out := make([]*schema.Message, 0, len(msgs))
|
|
for i := range msgs {
|
|
m := msgs[i]
|
|
role := m.Role
|
|
if role == "" {
|
|
role = schema.User
|
|
}
|
|
out = append(out, &schema.Message{Role: role, Content: m.Content})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// chatURLSuffixFor returns the URLSuffix the factory should pass to
|
|
// the driver for the chat endpoint. Each driver's ChatWithMessages
|
|
// builds `baseURL/URLSuffix.Chat`, so the suffix has to match the
|
|
// provider's actual chat path. We seed the common ones here; for any
|
|
// driver the factory has no entry for, we fall through to a default
|
|
// "chat/completions" path (the openai-compatible default), which
|
|
// matches the dummy driver and any third-party openai-compatible
|
|
// gateway.
|
|
func chatURLSuffixFor(driver string) models.URLSuffix {
|
|
switch strings.ToLower(driver) {
|
|
case "anthropic":
|
|
return models.URLSuffix{Chat: "v1/messages"}
|
|
default:
|
|
return models.URLSuffix{Chat: "chat/completions"}
|
|
}
|
|
}
|
|
|
|
// NewLLMComponent builds an LLMComponent from raw params.
|
|
func NewLLMComponent(p LLMParam) *LLMComponent {
|
|
return &LLMComponent{param: p}
|
|
}
|
|
|
|
// Name returns the registered component name.
|
|
func (c *LLMComponent) Name() string { return "LLM" }
|
|
|
|
// Invoke runs the LLM and returns the output map.
|
|
func (c *LLMComponent) Invoke(ctx context.Context, inputs map[string]any) (map[string]any, error) {
|
|
p := mergeLLMParam(c.param, inputs)
|
|
if p.ModelID == "" {
|
|
return nil, &ParamError{Field: "model_id", Reason: "required"}
|
|
}
|
|
if p.UserPrompt == "" && p.SystemPrompt == "" {
|
|
return nil, &ParamError{Field: "user_prompt", Reason: "at least one of user_prompt or system_prompt must be set"}
|
|
}
|
|
// The Anthropic driver (and the openai chat-completions driver
|
|
// when the system role is dropped) reject a system-only message
|
|
// list with "messages is empty" / 400. v1 fixtures frequently
|
|
// ship only a system prompt; fall back to using the system text
|
|
// as the user message so the call still goes through. The
|
|
// answer text in that case is the model continuing the
|
|
// instruction in its reply slot, which is what the v1 fixtures
|
|
// also expect.
|
|
if p.UserPrompt == "" {
|
|
p.UserPrompt = p.SystemPrompt
|
|
}
|
|
|
|
msgs := buildMessages(p.SystemPrompt, p.UserPrompt)
|
|
inv := getDefaultChatInvoker()
|
|
resp, err := inv.Invoke(ctx, ChatInvokeRequest{
|
|
Driver: p.Driver,
|
|
ModelName: p.ModelID,
|
|
APIKey: p.APIKey,
|
|
BaseURL: p.BaseURL,
|
|
Messages: msgs,
|
|
Temperature: p.Temperature,
|
|
MaxTokens: p.MaxTokens,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("component: LLM.Invoke: %w", err)
|
|
}
|
|
|
|
out := map[string]any{
|
|
"content": resp.Content,
|
|
"model": resp.Model,
|
|
"stopped": resp.Stopped,
|
|
"tokens": resp.Tokens,
|
|
}
|
|
if p.JSONOutput {
|
|
var parsed map[string]any
|
|
if err := json.Unmarshal([]byte(resp.Content), &parsed); err == nil {
|
|
out["json"] = parsed
|
|
} else {
|
|
// Surface a non-fatal warning — caller can still read "content".
|
|
log.Printf("component: LLM: json_output=true but content is not valid JSON: %v", err)
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// Stream implements Component.Stream. It forwards incremental chunks via
|
|
// the returned channel. When the model finishes, the channel is closed.
|
|
func (c *LLMComponent) Stream(ctx context.Context, inputs map[string]any) (<-chan map[string]any, error) {
|
|
out := make(chan map[string]any, 1)
|
|
go func() {
|
|
defer close(out)
|
|
result, err := c.Invoke(ctx, inputs)
|
|
if err != nil {
|
|
out <- map[string]any{"error": err.Error()}
|
|
return
|
|
}
|
|
out <- result
|
|
}()
|
|
return out, nil
|
|
}
|
|
|
|
// Inputs returns parameter metadata for tooling.
|
|
func (c *LLMComponent) Inputs() map[string]string {
|
|
return map[string]string{
|
|
"model_id": "Provider-side model identifier (e.g. \"gpt-4o-mini\")",
|
|
"system_prompt": "Optional system prompt prepended to the conversation",
|
|
"user_prompt": "User prompt; supports {{cpn_id@param}} references resolved by the canvas engine",
|
|
"temperature": "Sampling temperature (0.0-2.0). Optional.",
|
|
"max_tokens": "Maximum tokens to generate. Optional.",
|
|
"json_output": "If true, attempt to JSON-parse \"content\" into \"json\" output key.",
|
|
"driver": "Provider driver name (openai, anthropic, …). Defaults to \"dummy\".",
|
|
"api_key": "Override API key for this call. Empty defers to env.",
|
|
}
|
|
}
|
|
|
|
// Outputs returns output metadata.
|
|
func (c *LLMComponent) Outputs() map[string]string {
|
|
return map[string]string{
|
|
"content": "Assistant text response",
|
|
"model": "Model identifier echoed back (sanity check)",
|
|
"stopped": "True if the model finished naturally",
|
|
"tokens": "Reported token count (0 when not reported by the driver)",
|
|
"json": "When json_output=true and content parses as a JSON object, the parsed map",
|
|
}
|
|
}
|
|
|
|
// buildMessages assembles a system + user message sequence. Order:
|
|
// system first (if set), then user.
|
|
func buildMessages(system, user string) []schema.Message {
|
|
out := make([]schema.Message, 0, 2)
|
|
if system != "" {
|
|
out = append(out, schema.Message{Role: schema.System, Content: system})
|
|
}
|
|
if user != "" {
|
|
out = append(out, schema.Message{Role: schema.User, Content: user})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// mergeLLMParam layers raw inputs over the receiver's default param set.
|
|
//
|
|
// v1 DSL aliases accepted alongside the v2 names:
|
|
//
|
|
// "llm_id" → "model_id"
|
|
// "sys_prompt" → "system_prompt"
|
|
// "base_url" → "BaseURL"
|
|
//
|
|
// The v1 fixtures in internal/agent/dsl/testdata/v1_examples use the
|
|
// short forms; without these aliases the v1→v2 conversion (plan §2.5)
|
|
// would have to be run before the factory builds the component, which
|
|
// the e2e compile+invoke path doesn't do.
|
|
func mergeLLMParam(base LLMParam, inputs map[string]any) LLMParam {
|
|
p := base
|
|
if v, ok := stringFrom(inputs, "model_id"); ok {
|
|
p.ModelID = v
|
|
} else if v, ok := stringFrom(inputs, "llm_id"); ok {
|
|
p.ModelID = v
|
|
}
|
|
if v, ok := stringFrom(inputs, "system_prompt"); ok {
|
|
p.SystemPrompt = v
|
|
} else if v, ok := stringFrom(inputs, "sys_prompt"); ok {
|
|
p.SystemPrompt = v
|
|
}
|
|
if v, ok := stringFrom(inputs, "user_prompt"); ok {
|
|
p.UserPrompt = v
|
|
}
|
|
if v, ok := boolFrom(inputs, "json_output"); ok {
|
|
p.JSONOutput = v
|
|
}
|
|
if v, ok := stringFrom(inputs, "driver"); ok {
|
|
p.Driver = v
|
|
}
|
|
if v, ok := stringFrom(inputs, "api_key"); ok {
|
|
p.APIKey = v
|
|
}
|
|
if v, ok := stringFrom(inputs, "base_url"); ok {
|
|
p.BaseURL = v
|
|
}
|
|
if v, ok := floatFrom(inputs, "temperature"); ok {
|
|
f := v
|
|
p.Temperature = &f
|
|
}
|
|
if v, ok := intFrom(inputs, "max_tokens"); ok {
|
|
i := v
|
|
p.MaxTokens = &i
|
|
}
|
|
return p
|
|
}
|
|
|
|
// stringFrom extracts a string from inputs[name], accepting both string and
|
|
// fmt.Stringer-able values.
|
|
func stringFrom(inputs map[string]any, name string) (string, bool) {
|
|
v, ok := inputs[name]
|
|
if !ok {
|
|
return "", false
|
|
}
|
|
if s, ok := v.(string); ok {
|
|
return s, true
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
// boolFrom extracts a bool from inputs[name].
|
|
func boolFrom(inputs map[string]any, name string) (bool, bool) {
|
|
v, ok := inputs[name]
|
|
if !ok {
|
|
return false, false
|
|
}
|
|
if b, ok := v.(bool); ok {
|
|
return b, true
|
|
}
|
|
return false, false
|
|
}
|
|
|
|
// floatFrom extracts a float64 from inputs[name], also accepting int.
|
|
func floatFrom(inputs map[string]any, name string) (float64, bool) {
|
|
v, ok := inputs[name]
|
|
if !ok {
|
|
return 0, false
|
|
}
|
|
switch x := v.(type) {
|
|
case float64:
|
|
return x, true
|
|
case float32:
|
|
return float64(x), true
|
|
case int:
|
|
return float64(x), true
|
|
case int64:
|
|
return float64(x), true
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// intFrom extracts an int from inputs[name], also accepting float64.
|
|
func intFrom(inputs map[string]any, name string) (int, bool) {
|
|
v, ok := inputs[name]
|
|
if !ok {
|
|
return 0, false
|
|
}
|
|
switch x := v.(type) {
|
|
case int:
|
|
return x, true
|
|
case int64:
|
|
return int(x), true
|
|
case float64:
|
|
return int(x), true
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// init registers LLMComponent with the orchestrator-owned registry.
|
|
func init() {
|
|
Register("LLM", func(params map[string]any) (Component, error) {
|
|
var p LLMParam
|
|
if v, ok := stringFrom(params, "model_id"); ok {
|
|
p.ModelID = v
|
|
} else if v, ok := stringFrom(params, "llm_id"); ok {
|
|
p.ModelID = v
|
|
}
|
|
if v, ok := stringFrom(params, "system_prompt"); ok {
|
|
p.SystemPrompt = v
|
|
} else if v, ok := stringFrom(params, "sys_prompt"); ok {
|
|
p.SystemPrompt = v
|
|
}
|
|
if v, ok := stringFrom(params, "user_prompt"); ok {
|
|
p.UserPrompt = v
|
|
}
|
|
if v, ok := floatFrom(params, "temperature"); ok {
|
|
f := v
|
|
p.Temperature = &f
|
|
}
|
|
if v, ok := intFrom(params, "max_tokens"); ok {
|
|
i := v
|
|
p.MaxTokens = &i
|
|
}
|
|
if v, ok := boolFrom(params, "json_output"); ok {
|
|
p.JSONOutput = v
|
|
}
|
|
if v, ok := stringFrom(params, "driver"); ok {
|
|
p.Driver = v
|
|
}
|
|
if v, ok := stringFrom(params, "api_key"); ok {
|
|
p.APIKey = v
|
|
}
|
|
if v, ok := stringFrom(params, "base_url"); ok {
|
|
p.BaseURL = v
|
|
}
|
|
return NewLLMComponent(p), nil
|
|
})
|
|
}
|