mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 09:11:59 +08:00
feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
This commit is contained in:
305
internal/agent/component/string_transform.go
Normal file
305
internal/agent/component/string_transform.go
Normal file
@@ -0,0 +1,305 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// Package component — StringTransform (T3, plan §2.11.3 row 18).
|
||||
//
|
||||
// StringTransform has two modes:
|
||||
//
|
||||
// split — break a string on one or more literal delimiters
|
||||
// merge — substitute {{name}} placeholders in a script with values
|
||||
// pulled from the inputs map or the canvas state
|
||||
//
|
||||
// Mirrors agent/component/string_transform.py. The P1 port supports the
|
||||
// common {{name}} placeholder shape only; the full Jinja2 surface
|
||||
// (`{% if %}`, `{% for %}`) is deferred to a later phase per the plan.
|
||||
package component
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"ragflow/internal/agent/runtime"
|
||||
)
|
||||
|
||||
const componentNameStringTransform = "StringTransform"
|
||||
|
||||
// stringTransformParam is the static configuration.
|
||||
type stringTransformParam struct {
|
||||
Method string `json:"method"` // "split" or "merge"
|
||||
Script string `json:"script"` // merge mode: template
|
||||
SplitRef string `json:"split_ref"` // split mode: state ref to read
|
||||
Delimiters []string `json:"delimiters"` // split mode: literal delimiters
|
||||
}
|
||||
|
||||
// Update copies a fresh param map into the receiver.
|
||||
func (p *stringTransformParam) Update(conf map[string]any) error {
|
||||
if conf == nil {
|
||||
conf = map[string]any{}
|
||||
}
|
||||
p.Method, _ = conf["method"].(string)
|
||||
if p.Method == "" {
|
||||
p.Method = "split"
|
||||
}
|
||||
p.Script, _ = conf["script"].(string)
|
||||
p.SplitRef, _ = conf["split_ref"].(string)
|
||||
|
||||
switch v := conf["delimiters"].(type) {
|
||||
case []any:
|
||||
out := make([]string, 0, len(v))
|
||||
for _, item := range v {
|
||||
if s, ok := item.(string); ok {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
p.Delimiters = out
|
||||
case []string:
|
||||
// already correct shape
|
||||
p.Delimiters = append(p.Delimiters[:0], v...)
|
||||
case nil:
|
||||
// leave unchanged
|
||||
default:
|
||||
// unknown shape — treat as empty; Check() will reject
|
||||
p.Delimiters = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check validates the param.
|
||||
func (p *stringTransformParam) Check() error {
|
||||
switch p.Method {
|
||||
case "split", "merge":
|
||||
// ok
|
||||
default:
|
||||
return &ParamError{Field: "method", Reason: "must be one of: split, merge"}
|
||||
}
|
||||
if len(p.Delimiters) == 0 {
|
||||
return &ParamError{Field: "delimiters", Reason: "must not be empty"}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AsDict returns the params as a plain map.
|
||||
func (p *stringTransformParam) AsDict() map[string]any {
|
||||
return map[string]any{
|
||||
"method": p.Method,
|
||||
"script": p.Script,
|
||||
"split_ref": p.SplitRef,
|
||||
"delimiters": p.Delimiters,
|
||||
}
|
||||
}
|
||||
|
||||
// placeholderPattern matches {{name}} where name is an identifier-like
|
||||
// sequence. Intentionally narrower than the canvas var-ref pattern
|
||||
// (which also handles sys.x / env.x) because merge placeholders are
|
||||
// looked up in the inputs map and/or canvas state by simple key, not
|
||||
// the full cpn_id@param / sys.x / env.x grammar.
|
||||
var placeholderPattern = regexp.MustCompile(`\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}`)
|
||||
|
||||
// StringTransformComponent implements the split/merge component.
|
||||
type StringTransformComponent struct {
|
||||
name string
|
||||
param stringTransformParam
|
||||
}
|
||||
|
||||
// NewStringTransformComponent constructs a StringTransform from the
|
||||
// DSL param map.
|
||||
func NewStringTransformComponent(params map[string]any) (Component, error) {
|
||||
p := &stringTransformParam{}
|
||||
if err := p.Update(params); err != nil {
|
||||
return nil, fmt.Errorf("StringTransform: param update: %w", err)
|
||||
}
|
||||
if err := p.Check(); err != nil {
|
||||
return nil, fmt.Errorf("StringTransform: param check: %w", err)
|
||||
}
|
||||
return &StringTransformComponent{
|
||||
name: componentNameStringTransform,
|
||||
param: *p,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name returns the registered component name.
|
||||
func (s *StringTransformComponent) Name() string { return s.name }
|
||||
|
||||
// Invoke runs the configured method (split or merge) and returns
|
||||
// outputs["result"] with the transformed payload.
|
||||
func (s *StringTransformComponent) Invoke(ctx context.Context, inputs map[string]any) (map[string]any, error) {
|
||||
state, _, err := runtime.GetStateFromContext[*runtime.CanvasState](ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("StringTransform: %w", err)
|
||||
}
|
||||
if state == nil {
|
||||
return nil, fmt.Errorf("StringTransform: nil canvas state")
|
||||
}
|
||||
|
||||
if s.param.Method == "split" {
|
||||
return s.doSplit(ctx, state, inputs)
|
||||
}
|
||||
return s.doMerge(ctx, state, inputs), nil
|
||||
}
|
||||
|
||||
// Stream mirrors Invoke; StringTransform is a single-shot transform.
|
||||
func (s *StringTransformComponent) Stream(ctx context.Context, inputs map[string]any) (<-chan map[string]any, error) {
|
||||
out, err := s.Invoke(ctx, inputs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan map[string]any, 1)
|
||||
ch <- out
|
||||
close(ch)
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// Inputs returns the parameter surface. The shape depends on the
|
||||
// configured method.
|
||||
func (s *StringTransformComponent) Inputs() map[string]string {
|
||||
if s.param.Method == "split" {
|
||||
return map[string]string{
|
||||
"line": "Optional direct string to split; if absent, the component reads state[split_ref].",
|
||||
}
|
||||
}
|
||||
// merge: placeholders derived from the script
|
||||
names := extractPlaceholders(s.param.Script)
|
||||
out := make(map[string]string, len(names))
|
||||
for _, n := range names {
|
||||
out[n] = "Value to substitute for {{" + n + "}} (drawn from inputs or state)."
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Outputs returns the transformed payload.
|
||||
func (s *StringTransformComponent) Outputs() map[string]string {
|
||||
return map[string]string{
|
||||
"result": "Split: a []string of kept tokens. Merge: a single string with placeholders resolved.",
|
||||
}
|
||||
}
|
||||
|
||||
// doSplit runs the split method. Mirrors the Python _split helper
|
||||
// (string_transform.py:76-91): build a regex of the literal
|
||||
// delimiters, split with capture groups, keep the even-indexed
|
||||
// (non-delimiter) tokens.
|
||||
func (s *StringTransformComponent) doSplit(_ context.Context, state *runtime.CanvasState, inputs map[string]any) (map[string]any, error) {
|
||||
var varValue string
|
||||
if line, ok := inputs["line"].(string); ok && line != "" {
|
||||
varValue = line
|
||||
} else if s.param.SplitRef != "" {
|
||||
v, err := state.GetVar(s.param.SplitRef)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("StringTransform: split_ref %q: %w", s.param.SplitRef, err)
|
||||
}
|
||||
if v == nil {
|
||||
varValue = ""
|
||||
} else if s, ok := v.(string); ok {
|
||||
varValue = s
|
||||
} else {
|
||||
return nil, fmt.Errorf("StringTransform: split input is not a string: %T", v)
|
||||
}
|
||||
}
|
||||
|
||||
// Build the regex: |.join([regexp.QuoteMeta(d) for d in delimiters])
|
||||
parts := make([]string, 0, len(s.param.Delimiters))
|
||||
for _, d := range s.param.Delimiters {
|
||||
parts = append(parts, regexp.QuoteMeta(d))
|
||||
}
|
||||
pattern := "(?s)(" + strings.Join(parts, "|") + ")"
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("StringTransform: bad delimiter pattern: %w", err)
|
||||
}
|
||||
matches := re.FindAllStringIndex(varValue, -1)
|
||||
|
||||
// Walk the input string, collecting the content between delimiter
|
||||
// matches. This mirrors Python's re.split with a capture group
|
||||
// (which interleaves content and delimiter tokens) followed by
|
||||
// dropping the odd-indexed (delimiter) tokens. When there are no
|
||||
// matches, the whole input is a single content token.
|
||||
kept := make([]string, 0, len(matches)+1)
|
||||
prevEnd := 0
|
||||
for _, m := range matches {
|
||||
kept = append(kept, varValue[prevEnd:m[0]])
|
||||
prevEnd = m[1]
|
||||
}
|
||||
kept = append(kept, varValue[prevEnd:])
|
||||
return map[string]any{"result": kept}, nil
|
||||
}
|
||||
|
||||
// doMerge runs the merge method. Mirrors the Python _merge helper
|
||||
// (string_transform.py:93-112): collect {{name}} placeholders, resolve
|
||||
// each from inputs (preferred) or canvas state, substitute, and emit
|
||||
// the resolved script.
|
||||
func (s *StringTransformComponent) doMerge(_ context.Context, state *runtime.CanvasState, inputs map[string]any) map[string]any {
|
||||
script := s.param.Script
|
||||
|
||||
// First pass: state-level template resolution for any {{ref}} that
|
||||
// is a valid cpn_id@param / sys.x / env.x reference. The Python
|
||||
// _is_jinjia2 + template.render path is more general; for P1 we
|
||||
// only support the simple state-resolvable form.
|
||||
if strings.Contains(script, "{{") {
|
||||
if resolved, err := runtime.ResolveTemplate(script, state); err == nil {
|
||||
script = resolved
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: {{name}} placeholders → values from inputs, then state.
|
||||
names := extractPlaceholders(script)
|
||||
if len(names) == 0 {
|
||||
return map[string]any{"result": script}
|
||||
}
|
||||
for _, n := range names {
|
||||
placeholder := "{{" + n + "}}"
|
||||
var value any
|
||||
if v, ok := inputs[n]; ok {
|
||||
value = v
|
||||
} else if v, err := state.GetVar(n); err == nil && v != nil {
|
||||
value = v
|
||||
} else {
|
||||
value = ""
|
||||
}
|
||||
script = strings.ReplaceAll(script, placeholder, fmt.Sprintf("%v", value))
|
||||
}
|
||||
return map[string]any{"result": script}
|
||||
}
|
||||
|
||||
// extractPlaceholders returns the unique placeholder names appearing
|
||||
// in s, in first-occurrence order.
|
||||
func extractPlaceholders(s string) []string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
matches := placeholderPattern.FindAllStringSubmatch(s, -1)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
seen := make(map[string]struct{}, len(matches))
|
||||
out := make([]string, 0, len(matches))
|
||||
for _, m := range matches {
|
||||
if len(m) < 2 {
|
||||
continue
|
||||
}
|
||||
name := m[1]
|
||||
if _, dup := seen[name]; dup {
|
||||
continue
|
||||
}
|
||||
seen[name] = struct{}{}
|
||||
out = append(out, name)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func init() {
|
||||
Register(componentNameStringTransform, NewStringTransformComponent)
|
||||
}
|
||||
Reference in New Issue
Block a user