mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
274 lines
8.9 KiB
Go
274 lines
8.9 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
// Package component — Browser (T3, plan §2.11.3 row 15).
|
|
//
|
|
// Browser visits a URL, fetches the HTML body, and (optionally) asks an
|
|
// LLM to summarize the page. The P4 implementation focuses on the fetch
|
|
// half: it returns the body as a string with size metadata. The LLM-
|
|
// summary path is a no-op passthrough when model_id is unset, with the
|
|
// wiring left in place for Phase 5 (when the model's ChatInvoker is
|
|
// available without duplicating the LLM component's internals here).
|
|
//
|
|
// Storage upload of downloaded artifacts is deferred to Phase 5 per
|
|
// the plan; for now the response carries the bytes' size, not the bytes
|
|
// themselves, to keep large-payload flows off the canvas state bag.
|
|
//
|
|
// The transport wraps net/http with otelhttp.NewTransport so the
|
|
// outbound request participates in the active OTel trace (plan §2.10).
|
|
package component
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
|
|
|
"ragflow/internal/agent/runtime"
|
|
)
|
|
|
|
const (
|
|
componentNameBrowser = "Browser"
|
|
|
|
defaultBrowserTimeout = 30 * time.Second
|
|
maxBrowserResponseBody = 16 << 20 // 16 MiB; same cap as Invoke
|
|
)
|
|
|
|
// browserParam is the static configuration for a Browser node.
|
|
type browserParam struct {
|
|
ModelID string `json:"model_id"` // optional LLM summarizer model
|
|
URL string `json:"url"` // default target URL
|
|
Prompt string `json:"prompt"` // optional summarization prompt
|
|
Timeout int `json:"timeout"` // per-request timeout in seconds
|
|
}
|
|
|
|
// Update copies a fresh param map into the receiver.
|
|
func (p *browserParam) Update(conf map[string]any) error {
|
|
if conf == nil {
|
|
conf = map[string]any{}
|
|
}
|
|
p.ModelID, _ = conf["model_id"].(string)
|
|
p.URL, _ = conf["url"].(string)
|
|
p.Prompt, _ = conf["prompt"].(string)
|
|
// Preserve an explicitly-supplied timeout (including 0 / negative)
|
|
// so Check() can reject bad values. Only reset to zero when the
|
|
// caller omitted the field entirely.
|
|
if v, ok := intFrom(conf, "timeout"); ok {
|
|
p.Timeout = v
|
|
} else {
|
|
p.Timeout = 0
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Check validates the param. URL is optional at construction time —
|
|
// the resolved URL (param or input override) is checked at Invoke time
|
|
// so test fixtures can construct the component without a real URL.
|
|
func (p *browserParam) Check() error {
|
|
if p.Timeout < 0 {
|
|
return &ParamError{Field: "timeout", Reason: "must be non-negative"}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AsDict returns the params as a plain map.
|
|
func (p *browserParam) AsDict() map[string]any {
|
|
return map[string]any{
|
|
"model_id": p.ModelID,
|
|
"url": p.URL,
|
|
"prompt": p.Prompt,
|
|
"timeout": p.Timeout,
|
|
}
|
|
}
|
|
|
|
// BrowserComponent implements the Browser canvas node.
|
|
type BrowserComponent struct {
|
|
name string
|
|
param browserParam
|
|
}
|
|
|
|
// NewBrowserComponent constructs a Browser from the DSL param map.
|
|
func NewBrowserComponent(params map[string]any) (Component, error) {
|
|
p := &browserParam{}
|
|
if err := p.Update(params); err != nil {
|
|
return nil, fmt.Errorf("Browser: param update: %w", err)
|
|
}
|
|
if err := p.Check(); err != nil {
|
|
return nil, fmt.Errorf("Browser: param check: %w", err)
|
|
}
|
|
return &BrowserComponent{
|
|
name: componentNameBrowser,
|
|
param: *p,
|
|
}, nil
|
|
}
|
|
|
|
// Name returns the registered component name.
|
|
func (b *BrowserComponent) Name() string { return b.name }
|
|
|
|
// Invoke visits the (resolved) URL, returns the response body as
|
|
// content, the final URL after any redirects, the HTTP status, and the
|
|
// bytes' size. When model_id is set in the param and a prompt is
|
|
// provided, the LLM summarization hook is left for Phase 5; for P4 the
|
|
// content field simply contains the fetched body.
|
|
func (b *BrowserComponent) Invoke(ctx context.Context, inputs map[string]any) (map[string]any, error) {
|
|
state, _, err := runtime.GetStateFromContext[*runtime.CanvasState](ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Browser: %w", err)
|
|
}
|
|
if state == nil {
|
|
return nil, errors.New("Browser: nil canvas state")
|
|
}
|
|
|
|
// Resolve URL: input override → state(file_ref) → param default.
|
|
rawURL := b.param.URL
|
|
if v, ok := inputs["url"].(string); ok && strings.TrimSpace(v) != "" {
|
|
rawURL = v
|
|
} else if ref, ok := inputs["file_ref"].(string); ok && ref != "" {
|
|
// file_ref points at a stored path/url; for P4 we just echo it
|
|
// back as the target URL (Phase 5 will resolve to a MinIO path).
|
|
if v, err := state.GetVar(ref); err == nil && v != nil {
|
|
if s, ok := v.(string); ok && s != "" {
|
|
rawURL = s
|
|
}
|
|
}
|
|
}
|
|
if strings.TrimSpace(rawURL) == "" {
|
|
return nil, &ParamError{Field: "url", Reason: "required (param or inputs.url)"}
|
|
}
|
|
if _, err := url.Parse(rawURL); err != nil {
|
|
return nil, fmt.Errorf("Browser: parse url: %w", err)
|
|
}
|
|
|
|
// Resolve prompt override (input.prompt → param.prompt).
|
|
prompt := b.param.Prompt
|
|
if v, ok := inputs["prompt"].(string); ok && v != "" {
|
|
prompt = v
|
|
}
|
|
|
|
timeout := defaultBrowserTimeout
|
|
if b.param.Timeout > 0 {
|
|
timeout = time.Duration(b.param.Timeout) * time.Second
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Browser: build request: %w", err)
|
|
}
|
|
req.Header.Set("User-Agent", "ragflow-agent/1.0 (Browser component)")
|
|
// Encourage HTML / text responses; some servers sniff the UA and
|
|
// only return text/html for browser-shaped UAs.
|
|
req.Header.Set("Accept", "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.5")
|
|
|
|
client := &http.Client{
|
|
Timeout: timeout,
|
|
Transport: otelhttp.NewTransport(http.DefaultTransport),
|
|
// Don't follow redirects transparently — surface the final URL
|
|
// in outputs and let the orchestrator decide policy.
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= 10 {
|
|
return errors.New("Browser: too many redirects")
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Browser: do: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
limited := io.LimitReader(resp.Body, maxBrowserResponseBody)
|
|
bodyBytes, err := io.ReadAll(limited)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Browser: read body: %w", err)
|
|
}
|
|
|
|
finalURL := rawURL
|
|
if resp.Request != nil && resp.Request.URL != nil {
|
|
finalURL = resp.Request.URL.String()
|
|
}
|
|
|
|
content := string(bodyBytes)
|
|
// LLM summarization placeholder: if a model + prompt are both set,
|
|
// we mark the intent on the response. The actual chat call is left
|
|
// to Phase 5 to avoid re-implementing the LLM component's logic
|
|
// inline (which would split the model-resolution path in two).
|
|
modelID := b.param.ModelID
|
|
if v, ok := inputs["model_id"].(string); ok && v != "" {
|
|
modelID = v
|
|
}
|
|
if modelID != "" && prompt != "" {
|
|
// Phase 5 will add the actual LLM summarization call. For P4,
|
|
// we surface a hint that the model/prompt were considered by
|
|
// leaving the body unchanged and echoing the resolved
|
|
// model_id / prompt on the response (see outputs map below).
|
|
_ = content
|
|
}
|
|
|
|
return map[string]any{
|
|
"content": content,
|
|
"url": finalURL,
|
|
"status": resp.StatusCode,
|
|
"size": len(bodyBytes),
|
|
"model_id": modelID,
|
|
"prompt": prompt,
|
|
}, nil
|
|
}
|
|
|
|
// Stream mirrors Invoke; Browser is a single-shot HTTP fetch.
|
|
func (b *BrowserComponent) Stream(ctx context.Context, inputs map[string]any) (<-chan map[string]any, error) {
|
|
out, err := b.Invoke(ctx, inputs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ch := make(chan map[string]any, 1)
|
|
ch <- out
|
|
close(ch)
|
|
return ch, nil
|
|
}
|
|
|
|
// Inputs returns parameter metadata.
|
|
func (b *BrowserComponent) Inputs() map[string]string {
|
|
return map[string]string{
|
|
"model_id": "Optional LLM model id used to summarize the fetched page (Phase 5).",
|
|
"url": "Target URL; can be a {{...}} reference resolved upstream.",
|
|
"prompt": "Optional LLM prompt (e.g. \"summarize this page\"); used when model_id is set.",
|
|
"timeout": "Per-request timeout in seconds; default 30.",
|
|
}
|
|
}
|
|
|
|
// Outputs returns the response surface.
|
|
func (b *BrowserComponent) Outputs() map[string]string {
|
|
return map[string]string{
|
|
"content": "Response body (string, truncated at 16 MiB).",
|
|
"url": "Final URL after redirects.",
|
|
"status": "HTTP status code (int).",
|
|
"size": "Body size in bytes (int).",
|
|
"model_id": "Resolved LLM model id (empty when summarization is disabled).",
|
|
"prompt": "Resolved LLM prompt (echoed back for downstream nodes).",
|
|
}
|
|
}
|
|
|
|
func init() {
|
|
Register(componentNameBrowser, NewBrowserComponent)
|
|
}
|