mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
232 lines
7.6 KiB
Go
232 lines
7.6 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
// Package component — Invoke component (T3, plan §2.11.3 row 14, §2.7).
|
|
//
|
|
// Invoke is the canvas HTTP client node. It supports GET/POST/PUT/DELETE
|
|
// with custom headers, optional proxy, and per-request timeout, and
|
|
// wraps the underlying net/http.Transport with
|
|
// go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp.NewTransport
|
|
// so outbound calls automatically propagate W3C traceparent headers
|
|
// (plan §2.10 — OTel integration).
|
|
//
|
|
// The P0 implementation does NOT include HTML cleaning, JSON form-data
|
|
// building, or retry/backoff. Those land in Phase 2 P3 (per plan §2.7)
|
|
// when deepdoc HTTP use cases first need them.
|
|
package component
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
|
)
|
|
|
|
const (
|
|
componentNameInvoke = "Invoke"
|
|
|
|
defaultInvokeTimeout = 30 * time.Second
|
|
defaultInvokeUserAgent = "ragflow-agent/1.0 (Invoke component)"
|
|
defaultInvokeContentCT = "application/json"
|
|
maxInvokeResponseBody = 16 << 20 // 16 MiB; hard cap to avoid OOM
|
|
)
|
|
|
|
// InvokeComponent is the HTTP client node. Stateless across invocations.
|
|
type InvokeComponent struct {
|
|
name string
|
|
}
|
|
|
|
// NewInvokeComponent constructs an Invoke component.
|
|
func NewInvokeComponent(_ map[string]any) (Component, error) {
|
|
return &InvokeComponent{name: componentNameInvoke}, nil
|
|
}
|
|
|
|
// Name returns the registered component name.
|
|
func (i *InvokeComponent) Name() string { return i.name }
|
|
|
|
// Invoke executes a single HTTP request and returns the status code,
|
|
// body, and response headers. See Inputs() for the param contract.
|
|
func (i *InvokeComponent) Invoke(ctx context.Context, inputs map[string]any) (map[string]any, error) {
|
|
method, _ := inputs["method"].(string)
|
|
method = strings.ToUpper(strings.TrimSpace(method))
|
|
switch method {
|
|
case http.MethodGet, http.MethodPost, http.MethodPut, http.MethodDelete:
|
|
default:
|
|
return nil, fmt.Errorf("Invoke: invalid method %q (want GET/POST/PUT/DELETE)", method)
|
|
}
|
|
rawURL, _ := inputs["url"].(string)
|
|
if rawURL == "" {
|
|
return nil, errors.New("Invoke: url is required")
|
|
}
|
|
// url.Parse is a sanity check; we trust the orchestrator to have
|
|
// already resolved any {{...}} refs, but a bad string here is a
|
|
// programmer error worth surfacing.
|
|
if _, err := url.Parse(rawURL); err != nil {
|
|
return nil, fmt.Errorf("Invoke: parse url: %w", err)
|
|
}
|
|
|
|
timeout := defaultInvokeTimeout
|
|
if v, ok := inputs["timeout"].(int); ok && v > 0 {
|
|
timeout = time.Duration(v) * time.Second
|
|
} else if v, ok := inputs["timeout"].(float64); ok && v > 0 {
|
|
timeout = time.Duration(v) * time.Second
|
|
}
|
|
|
|
contentType, _ := inputs["content_type"].(string)
|
|
if contentType == "" && (method == http.MethodPost || method == http.MethodPut) {
|
|
contentType = defaultInvokeContentCT
|
|
}
|
|
|
|
var body io.Reader
|
|
if s, ok := inputs["body"].(string); ok && s != "" {
|
|
body = bytes.NewReader([]byte(s))
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, method, rawURL, body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Invoke: build request: %w", err)
|
|
}
|
|
if contentType != "" {
|
|
req.Header.Set("Content-Type", contentType)
|
|
}
|
|
req.Header.Set("User-Agent", defaultInvokeUserAgent)
|
|
if h, ok := inputs["headers"].(map[string]any); ok {
|
|
for k, v := range h {
|
|
if s, ok := v.(string); ok {
|
|
req.Header.Set(k, s)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Wrap the stdlib Transport with otelhttp so the request gets a
|
|
// child span + W3C traceparent injected automatically.
|
|
transport := otelhttp.NewTransport(http.DefaultTransport)
|
|
// Optional proxy support: if inputs["proxy"] is set, build a
|
|
// dedicated Transport that uses it. This avoids mutating the
|
|
// global http.DefaultTransport (which would also affect unrelated
|
|
// components in the same process).
|
|
if proxyStr, ok := inputs["proxy"].(string); ok && proxyStr != "" {
|
|
transport = otelhttp.NewTransport(&http.Transport{
|
|
Proxy: http.ProxyURL(mustParseProxy(proxyStr)),
|
|
})
|
|
}
|
|
|
|
client := &http.Client{
|
|
Timeout: timeout,
|
|
Transport: transport,
|
|
}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Invoke: do: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Cap the response body to keep a hostile server from streaming
|
|
// infinite bytes into memory.
|
|
limited := io.LimitReader(resp.Body, maxInvokeResponseBody)
|
|
bodyBytes, err := io.ReadAll(limited)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Invoke: read body: %w", err)
|
|
}
|
|
|
|
hdr := make(map[string]string, len(resp.Header))
|
|
for k, vs := range resp.Header {
|
|
// First value only — multi-value headers are uncommon in
|
|
// canvas-DSL HTTP responses, and the param contract specifies
|
|
// a string map.
|
|
if len(vs) > 0 {
|
|
hdr[k] = vs[0]
|
|
}
|
|
}
|
|
|
|
return map[string]any{
|
|
"status": resp.StatusCode,
|
|
"body": string(bodyBytes),
|
|
"headers": hdr,
|
|
}, nil
|
|
}
|
|
|
|
// Stream is a synchronous facade over Invoke for P0. Real streaming
|
|
// (chunked transfer as it arrives) is deferred to Phase 2 P3.
|
|
func (i *InvokeComponent) Stream(ctx context.Context, inputs map[string]any) (<-chan map[string]any, error) {
|
|
out, err := i.Invoke(ctx, inputs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ch := make(chan map[string]any, 1)
|
|
ch <- out
|
|
close(ch)
|
|
return ch, nil
|
|
}
|
|
|
|
// Inputs returns the public parameter surface.
|
|
func (i *InvokeComponent) Inputs() map[string]string {
|
|
return map[string]string{
|
|
"method": "HTTP method: GET, POST, PUT, or DELETE (case-insensitive).",
|
|
"url": "Target URL; can be a {{...}} reference resolved upstream.",
|
|
"headers": "Optional map of string headers.",
|
|
"body": "Optional request body (string).",
|
|
"timeout": "Per-request timeout in seconds; default 30.",
|
|
"proxy": "Optional proxy URL (e.g. http://host:3128).",
|
|
"content_type": "Optional Content-Type; default 'application/json' for POST/PUT.",
|
|
}
|
|
}
|
|
|
|
// Outputs returns the response surface.
|
|
func (i *InvokeComponent) Outputs() map[string]string {
|
|
return map[string]string{
|
|
"status": "HTTP status code (int).",
|
|
"body": "Response body (string, truncated at 16 MiB).",
|
|
"headers": "Response headers (first value per key).",
|
|
}
|
|
}
|
|
|
|
// mustParseProxy parses a proxy URL string. We keep this helper here
|
|
// (rather than calling url.Parse inline) so the panic-on-bad-input
|
|
// behavior is uniform across the package — proxy strings are operator-
|
|
// configured, a malformed one is a deployment error worth crashing
|
|
// loud on.
|
|
func mustParseProxy(raw string) *url.URL {
|
|
u, err := url.Parse(raw)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Invoke: invalid proxy URL %q: %v", raw, err))
|
|
}
|
|
// Defensive check: net/http.ProxyURL will silently no-op on a
|
|
// URL with no Host. Surface a clear panic instead.
|
|
if u.Host == "" {
|
|
panic(fmt.Sprintf("Invoke: proxy URL %q has no host", raw))
|
|
}
|
|
return u
|
|
}
|
|
|
|
// netHTTPImports is a no-op reference to keep `net` in the import set
|
|
// for go vet's unused-import check while the production code path
|
|
// doesn't otherwise need the net package (only used by the optional
|
|
// proxy path via http.ProxyURL). Removed in Phase 2 P3.
|
|
var _ = net.IPv4len
|
|
|
|
func init() {
|
|
Register(componentNameInvoke, NewInvokeComponent)
|
|
}
|