Files
ragflow/internal/utility/ssrf.go
Zhichang Yu 4c54cefd29 Port 14 upstream agent security / correctness fixes to Go canvas (#16455)
Mirrors 14 merged upstream PRs into the Go agent port.

PRs ported:
  - #15609 ExeSQL SSRF guard + DNS pin
  - #15436 HTTP timeout on external API tools
  - #16363 be_output restore + DeepL error path
  - #15644 switch no longer matches empty condition
  - #15374 session_id bind to path agent_id (DAO idor guard)
  - #16169 sandbox artifact ownership gate
  - #15457 tenant ownership on agentbots
  - #15145 rerun agent document access check
- #15446 thinking switch (component portion; provider policy lives in
internal/llm)
  - #15426 Invoke URL/proxy SSRF + DNS pin + no-redirects
  - #15238 agentbot thinking-logs beta endpoint
  - #14589 UserFillUp SSE event propagation
  - #14890 anonymous webhook opt-in
- #15068 PipelineChunker new component (text/file_ref/parser_id
dispatch; file-format extraction is a follow-up)

40 files, +2355 / -58 lines. 33 new tests, all targeted package suites
pass (1721 + 4 skipped); 1 pre-existing flaky test unrelated.
2026-06-30 16:28:48 +08:00

214 lines
6.9 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package utility
import (
"context"
"fmt"
"net"
"net/http"
"net/url"
"sort"
"strings"
"time"
)
// AllowedURLSchemes are the schemes accepted by AssertURLSafe.
var AllowedURLSchemes = []string{"http", "https"}
// LookupHost is the indirection used to resolve hostnames. Tests override it.
var LookupHost = net.LookupHost
// AllowAnyHostForTest is a test-only override that bypasses the
// SSRF guard (no public-IP check, no DNS resolution, no DNS
// pinning). Production code MUST leave this at its zero value
// (false). Tests that need to talk to a local httptest server
// flip it on and reset it in t.Cleanup.
//
// The previous form (env-var ALLOW_ANY_HOST) was a live runtime
// toggle that any operator could flip to disable the SSRF guard
// globally — including the DNS pinning that the Invoke component
// relies on. PR review round 6, Major #3: this variable lives in
// process memory only, so it cannot be enabled by an env var or
// a deployment mistake. The explicit "_ForTest" suffix is the
// signal that production code must never touch it.
var AllowAnyHostForTest = false
// allowAnyHost reads the test-only override. Kept as a private
// helper so the call sites don't all have to know about the
// exported variable name.
func allowAnyHost() bool {
return AllowAnyHostForTest
}
// AssertURLSafe parses rawURL and rejects it if the scheme is disallowed,
// the host is missing, or any resolved IP is not globally routable
// (private, loopback, link-local, multicast, reserved). Returns the hostname
// and the first validated public IP so callers can DNS-pin the address and
// prevent rebinding between validation and the actual TCP connection.
//
// Mirrors common/ssrf_guard.py:assert_url_is_safe.
func AssertURLSafe(rawURL string) (hostname, resolvedIP string, err error) {
parsed, perr := url.Parse(strings.TrimSpace(rawURL))
if perr != nil {
return "", "", fmt.Errorf("Invalid url.")
}
scheme := strings.ToLower(parsed.Scheme)
if !schemeAllowed(scheme) {
sorted := append([]string(nil), AllowedURLSchemes...)
sort.Strings(sorted)
return "", "", fmt.Errorf("Disallowed URL scheme: '%s'. Only %v are allowed.", scheme, sorted)
}
hostname = parsed.Hostname()
if hostname == "" {
return "", "", fmt.Errorf("URL is missing a host.")
}
allowAny := allowAnyHost()
addrs, err := LookupHost(hostname)
if err != nil {
return "", "", fmt.Errorf("Could not resolve hostname '%s': %v", hostname, err)
}
if len(addrs) == 0 {
return "", "", fmt.Errorf("Hostname '%s' resolved to no addresses.", hostname)
}
for _, addr := range addrs {
ip := net.ParseIP(addr)
if ip == nil {
return "", "", fmt.Errorf("Could not parse resolved address '%s' for hostname '%s'.", addr, hostname)
}
if !allowAny && !isGlobalIP(effectiveIP(ip)) {
return "", "", fmt.Errorf("URL resolves to a non-public address (%s), which is not allowed.", ip.String())
}
if resolvedIP == "" {
resolvedIP = ip.String()
}
}
return hostname, resolvedIP, nil
}
func schemeAllowed(scheme string) bool {
for _, s := range AllowedURLSchemes {
if s == scheme {
return true
}
}
return false
}
// effectiveIP unwraps IPv4-mapped IPv6 addresses (e.g. ::ffff:127.0.0.1) so
// the routability check sees the IPv4 form. Without this, an attacker could
// bypass the guard with an IPv4-mapped IPv6 representation of a private host.
func effectiveIP(ip net.IP) net.IP {
if v4 := ip.To4(); v4 != nil {
return v4
}
return ip
}
// isGlobalIP mirrors Python's ipaddress.IPv*Address.is_global: an address is
// global if it is none of {unspecified, loopback, multicast, link-local,
// private (including CGNAT and IPv6 ULA), benchmarking, documentation,
// reserved}.
func isGlobalIP(ip net.IP) bool {
if ip == nil || ip.IsUnspecified() || ip.IsLoopback() || ip.IsMulticast() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() || ip.IsPrivate() {
return false
}
if v4 := ip.To4(); v4 != nil {
// CGNAT 100.64.0.0/10 — not flagged by IsPrivate in older Go versions.
if v4[0] == 100 && v4[1]&0xC0 == 64 {
return false
}
// 192.0.0.0/24 reserved for IETF protocol assignments.
if v4[0] == 192 && v4[1] == 0 && v4[2] == 0 {
return false
}
// 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 documentation (TEST-NET-1/2/3).
if v4[0] == 192 && v4[1] == 0 && v4[2] == 2 {
return false
}
if v4[0] == 198 && v4[1] == 51 && v4[2] == 100 {
return false
}
if v4[0] == 203 && v4[1] == 0 && v4[2] == 113 {
return false
}
// 198.18.0.0/15 benchmarking.
if v4[0] == 198 && (v4[1] == 18 || v4[1] == 19) {
return false
}
// 240.0.0.0/4 reserved (excluding 255.255.255.255 which IsUnspecified misses).
if v4[0] >= 240 {
return false
}
} else if v6 := ip.To16(); v6 != nil {
// 2001:db8::/32 documentation prefix.
if v6[0] == 0x20 && v6[1] == 0x01 && v6[2] == 0x0d && v6[3] == 0xb8 {
return false
}
// 100::/64 discard-only address block.
if v6[0] == 0x01 && v6[1] == 0x00 && allZero(v6[2:8]) {
return false
}
}
return true
}
func allZero(b []byte) bool {
for _, x := range b {
if x != 0 {
return false
}
}
return true
}
// PinnedHTTPClient returns an HTTP client whose Transport rewrites every
// outbound dial for hostname:port to resolvedIP:port, closing the TOCTOU
// window between AssertURLSafe and the actual TCP connection. Pins are
// scoped to this client only.
func PinnedHTTPClient(hostname, resolvedIP string, timeout time.Duration) *http.Client {
dialer := &net.Dialer{
Timeout: timeout,
KeepAlive: 30 * time.Second,
}
transport := &http.Transport{
// Disable environment proxy: HTTP_PROXY / HTTPS_PROXY would route
// the connection through the proxy host instead of the pinned
// resolvedIP, bypassing the SSRF guard.
Proxy: nil,
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
host, port, splitErr := net.SplitHostPort(addr)
if splitErr == nil && host == hostname && resolvedIP != "" {
return dialer.DialContext(ctx, network, net.JoinHostPort(resolvedIP, port))
}
return dialer.DialContext(ctx, network, addr)
},
TLSHandshakeTimeout: timeout,
ResponseHeaderTimeout: timeout,
ExpectContinueTimeout: 1 * time.Second,
ForceAttemptHTTP2: false,
}
return &http.Client{
Transport: transport,
Timeout: timeout,
}
}