Files
ragflow/internal/agent/component/invoke_test.go
Zhichang Yu 4c54cefd29 Port 14 upstream agent security / correctness fixes to Go canvas (#16455)
Mirrors 14 merged upstream PRs into the Go agent port.

PRs ported:
  - #15609 ExeSQL SSRF guard + DNS pin
  - #15436 HTTP timeout on external API tools
  - #16363 be_output restore + DeepL error path
  - #15644 switch no longer matches empty condition
  - #15374 session_id bind to path agent_id (DAO idor guard)
  - #16169 sandbox artifact ownership gate
  - #15457 tenant ownership on agentbots
  - #15145 rerun agent document access check
- #15446 thinking switch (component portion; provider policy lives in
internal/llm)
  - #15426 Invoke URL/proxy SSRF + DNS pin + no-redirects
  - #15238 agentbot thinking-logs beta endpoint
  - #14589 UserFillUp SSE event propagation
  - #14890 anonymous webhook opt-in
- #15068 PipelineChunker new component (text/file_ref/parser_id
dispatch; file-format extraction is a follow-up)

40 files, +2355 / -58 lines. 33 new tests, all targeted package suites
pass (1721 + 4 skipped); 1 pre-existing flaky test unrelated.
2026-06-30 16:28:48 +08:00

404 lines
14 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package component
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"ragflow/internal/utility"
)
// Test setup: enable the test-only SSRF bypass for tests in this
// file (the happy-path httptest server lives on 127.0.0.1, which
// the production guard rejects). The bypass is now a process-
// memory boolean (utility.AllowAnyHostForTest) instead of an
// env var — the previous form (ALLOW_ANY_HOST env) was a live
// runtime toggle any operator could flip to disable the guard
// globally. PR review round 6, Major #3.
//
// Each test that wants the production guard back resets it to
// false in its body; we don't blanket-disable here because some
// tests below rely on the bypass being on.
func setupAllowAnyHost(t *testing.T, enabled bool) {
t.Helper()
prev := utility.AllowAnyHostForTest
utility.AllowAnyHostForTest = enabled
t.Cleanup(func() { utility.AllowAnyHostForTest = prev })
}
// TestInvoke_GET exercises the happy path: a GET request to a stub
// server returns the canned body, and the response map carries the
// expected status / body / headers.
func TestInvoke_GET(t *testing.T) {
setupAllowAnyHost(t, true)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
t.Errorf("server: got method %q, want GET", r.Method)
}
w.Header().Set("X-Test", "ok")
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("hello"))
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": srv.URL,
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if status, _ := out["status"].(int); status != http.StatusOK {
t.Errorf("status: got %d, want 200", status)
}
if body, _ := out["body"].(string); body != "hello" {
t.Errorf("body: got %q, want %q", body, "hello")
}
hdr, _ := out["headers"].(map[string]string)
if hdr["X-Test"] != "ok" {
t.Errorf("headers[X-Test]: got %q, want %q", hdr["X-Test"], "ok")
}
}
// TestInvoke_POST verifies that POST with a body echoes the body back
// from the server. The Content-Type defaults to application/json when
// not specified; we confirm that default in the test.
func TestInvoke_POST(t *testing.T) {
setupAllowAnyHost(t, true)
var seenCT, seenBody string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
seenCT = r.Header.Get("Content-Type")
b, _ := io.ReadAll(r.Body)
seenBody = string(b)
w.WriteHeader(http.StatusCreated)
_, _ = w.Write([]byte("echo:" + seenBody))
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "POST",
"url": srv.URL,
"body": `{"k":"v"}`,
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if status, _ := out["status"].(int); status != http.StatusCreated {
t.Errorf("status: got %d, want 201", status)
}
if seenCT != "application/json" {
t.Errorf("server saw Content-Type %q, want application/json (default)", seenCT)
}
if seenBody != `{"k":"v"}` {
t.Errorf("server saw body %q, want %q", seenBody, `{"k":"v"}`)
}
if body, _ := out["body"].(string); body != `echo:{"k":"v"}` {
t.Errorf("body: got %q, want %q", body, `echo:{"k":"v"}`)
}
}
// TestInvoke_BadMethod ensures invalid HTTP methods are rejected
// before any network I/O happens.
func TestInvoke_BadMethod(t *testing.T) {
setupAllowAnyHost(t, true)
c, _ := NewInvokeComponent(nil)
_, err := c.Invoke(context.Background(), map[string]any{
"method": "PATCH",
"url": "http://localhost:1",
})
if err == nil {
t.Fatal("expected error for PATCH method, got nil")
}
if !strings.Contains(err.Error(), "invalid method") {
t.Errorf("error %q should mention invalid method", err.Error())
}
}
// TestInvoke_MissingURL confirms url is required.
func TestInvoke_MissingURL(t *testing.T) {
setupAllowAnyHost(t, true)
c, _ := NewInvokeComponent(nil)
_, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
})
if err == nil {
t.Fatal("expected error for missing url, got nil")
}
if !strings.Contains(err.Error(), "url is required") {
t.Errorf("error %q should mention url is required", err.Error())
}
}
// TestInvoke_SSRFGuard_BlocksLoopback mirrors PR #15426: when
// AllowAnyHostForTest is unset (production shape), the Invoke
// component must reject loopback / link-local / RFC1918 URLs
// BEFORE any HTTP request is made. The test sets up an
// httptest server on 127.0.0.1, points Invoke at it, and
// asserts the request never reached the server — the function
// returns an _ERROR output instead.
func TestInvoke_SSRFGuard_BlocksLoopback(t *testing.T) {
// Force the production guard on (override any inherited state).
setupAllowAnyHost(t, false)
var serverHit bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
serverHit = true
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": srv.URL,
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if serverHit {
t.Errorf("server was hit despite loopback URL; SSRF guard bypassed")
}
if got, _ := out["_ERROR"].(string); got != "URL not valid" {
t.Errorf("_ERROR = %q, want %q", got, "URL not valid")
}
if status, _ := out["status"].(int); status != 0 {
t.Errorf("status = %d, want 0 on SSRF block", status)
}
}
// TestInvoke_SSRFGuard_BlocksMetadataIP covers the cloud
// metadata endpoint (169.254.169.254) case.
func TestInvoke_SSRFGuard_BlocksMetadataIP(t *testing.T) {
setupAllowAnyHost(t, false)
var serverHit bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
serverHit = true
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": "http://169.254.169.254/latest/meta-data/",
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if serverHit {
t.Errorf("metadata IP was dialed; SSRF guard bypassed")
}
if got, _ := out["_ERROR"].(string); got != "URL not valid" {
t.Errorf("_ERROR = %q, want %q", got, "URL not valid")
}
}
// TestInvoke_SSRFGuard_BlocksProxy mirrors the python
// assert_url_is_safe(proxy_url) check. The proxy URL itself
// must be validated independently of the target URL.
func TestInvoke_SSRFGuard_BlocksProxy(t *testing.T) {
setupAllowAnyHost(t, false)
var serverHit bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
serverHit = true
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
// The proxy is a loopback URL; the SSRF guard must reject it
// regardless of the (presumed-public) target URL. Assert
// both that the server was never reached AND that the guard
// returned the canonical `_ERROR="URL not valid"` payload,
// so a regression that silently lets the request through
// (without a side-effect on the local server) is still
// caught. PR review round 5 / duplicate fix from the
// serverHit-only assertion in the earlier review.
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": "https://example.com/api",
"proxy": "http://127.0.0.1:8080",
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if serverHit {
t.Errorf("server hit despite unsafe proxy; guard bypassed")
}
if got, _ := out["_ERROR"].(string); got != "URL not valid" {
t.Errorf("unsafe proxy: _ERROR = %q, want %q", got, "URL not valid")
}
}
// TestInvoke_NoRedirects_NotFollowed asserts the
// CheckRedirect policy — a 302 response from the upstream
// must be returned to the caller (with the Location header),
// not followed. This closes the bypass window where a public
// host could 302-redirect to a private one.
func TestInvoke_NoRedirects_NotFollowed(t *testing.T) {
// Need the bypass to talk to the local httptest server.
setupAllowAnyHost(t, true)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Location", "http://127.0.0.1:1/secret")
w.WriteHeader(http.StatusFound)
}))
defer srv.Close()
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": srv.URL,
})
if err != nil {
t.Fatalf("Invoke: %v", err)
}
if status, _ := out["status"].(int); status != http.StatusFound {
t.Errorf("status = %d, want 302 (no follow)", status)
}
hdr, _ := out["headers"].(map[string]string)
if hdr["Location"] != "http://127.0.0.1:1/secret" {
t.Errorf("Location header not preserved: %v", hdr)
}
}
// TestInvoke_ProxyDNSPin guards the regression the user
// caught in code review: the proxy path's previous
// implementation only validated the proxy URL, but did not
// pin the dial target. The Go http.Transport dials the
// proxy host using its own dialer, which re-resolves the
// hostname at connect time — opening a TOCTOU window the
// SSRF guard was supposed to close.
//
// The fix: when a proxy is configured, the Invoke component
// wraps the proxy transport with a custom DialContext that
// intercepts the proxy-host dial and replaces the target
// with the validated public IP. The connection thus goes
// to the IP we validated, even if a subsequent DNS lookup
// returns a different answer.
//
// This test uses a public IP (8.8.8.8) as the proxy
// "resolved IP" so the dial target is well-known. The
// proxy URL itself is unreachable on the test network, so
// the dial will fail — but with an error that mentions
// the IP we dialled, not the original hostname. That
// proves the pinning path is active. We un-set
// ALLOW_ANY_HOST so the SSRF guard accepts a public-IP
// URL but the dial still happens through our code path.
//
// Target host is a literal IP (8.8.8.8) — proxy mode
// fail-closes for hostname targets because the proxy
// performs its own DNS resolution at connect time, which
// would re-open the rebinding window. PR review round 5,
// Major #3.
func TestInvoke_ProxyDNSPin(t *testing.T) {
setupAllowAnyHost(t, false)
// The validated proxy IP we will pin the dial to.
// 192.88.99.1 is the 6to4 anycast prefix — a real public
// IP that the SSRF guard accepts (not in any block-list
// range) but is highly unlikely to be listening on port
// 9999 in the test environment, so the dial fails fast
// with "connection refused" carrying the IP we pinned.
// Earlier versions used 1.1.1.1, but Cloudflare's edge
// network has started responding on 9999, so we switched
// to a less-likely-to-listen anycast prefix.
const pinnedProxyIP = "192.88.99.1"
// Build a small Invoke call with a proxy URL whose
// hostname will resolve via SSRF (we override the
// resolver below). The SSRF guard validates against
// the validated public IP, then the dialer is
// expected to use that IP — even if the hostname
// "rebinds" to a different answer afterwards.
// We achieve "rebinding" by stubbing the DNS lookup
// to return a different IP on a second call.
originalLookup := utility.LookupHost
utility.LookupHost = func(host string) ([]string, error) {
// Always return a public IP so SSRF accepts.
return []string{pinnedProxyIP}, nil
}
t.Cleanup(func() { utility.LookupHost = originalLookup })
// We expect the dial to fail (no proxy server at
// 1.1.1.1:9999). The error message tells us whether
// the pin was active: with the fix, the dial targets
// 1.1.1.1:9999; without the fix, the Go transport
// would have re-resolved the hostname and dialed a
// different IP (or refused to dial because the
// stubbed hostname is unreachable).
c, _ := NewInvokeComponent(nil)
// Tight per-call timeout so the test fails fast — the request
// will hang waiting for the unreachable proxy (1.1.1.1:9999)
// to respond, and the default 30s client timeout would make
// this test take 30s on every run.
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
_, err := c.Invoke(ctx, map[string]any{
"method": "GET",
"url": "http://8.8.8.8/api",
"proxy": "http://proxy.test.invalid:9999",
"timeout": 2,
})
if err == nil {
t.Fatal("expected dial error (no proxy listening), got nil")
}
// The dial error must reference 1.1.1.1:9999 (the
// pinned IP) — NOT the unresolved proxy.test.invalid
// hostname, which would prove the dialer fell through
// to the default resolver.
if !strings.Contains(err.Error(), pinnedProxyIP+":9999") {
t.Fatalf("dial error = %v; want pinned proxy IP %s:9999 "+
"(connection-refused is acceptable; an absent IP means "+
"the dialer fell through to the default resolver and the "+
"pinning regression went undetected)", err, pinnedProxyIP)
}
}
// TestInvoke_ProxyRejectsHostnameTarget pins PR review round 5,
// Major #3: proxy mode refuses hostname targets because the
// proxy performs its own DNS resolution at connect time, which
// would re-open the SSRF/DNS-rebinding window the SSRF guard
// just closed. The handler must return an _ERROR envelope (not
// a Go error) so the canvas can route around the failure.
func TestInvoke_ProxyRejectsHostnameTarget(t *testing.T) {
setupAllowAnyHost(t, false)
c, _ := NewInvokeComponent(nil)
out, err := c.Invoke(context.Background(), map[string]any{
"method": "GET",
"url": "http://example.com/api",
"proxy": "http://proxy.example.invalid:9999",
})
if err != nil {
t.Fatalf("Invoke: want nil Go error (canvas routes around _ERROR), got %v", err)
}
if got, _ := out["_ERROR"].(string); got != "URL not valid" {
t.Errorf("hostname+proxy target: _ERROR = %q, want %q", got, "URL not valid")
}
}