// // Copyright 2026 The InfiniFlow Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // package handler // Webhook security helpers. // // These mirror api/apps/restful_apis/agent_api.py:1602-1810 // (validate_webhook_security + the six sub-validators) from the Python // webhook handler. The Go port preserves the Python semantics exactly: // // - max body size (with the 10 MB cap at agent_api.py:1652) // - IP whitelist (CIDR + exact match) // - rate limit (token bucket via redis.EvalTokenBucketStrict — strict // fail-closed; see redis.go) // - token auth (header check) // - basic auth (HTTP Basic) // - JWT (HS/RS256, audience/issuer/required-claims) // // All helpers return a Go error so the handler can surface the // Python-shaped 102 envelope directly. Empty security config means // "no checks" (matches agent_api.py:1607). import ( "context" "fmt" "net" "strconv" "strings" "time" "github.com/gin-gonic/gin" "github.com/golang-jwt/jwt/v5" rediscli "ragflow/internal/engine/redis" ) const ( // bytesPerKB / bytesPerMB use SI decimal units (1 KB = 1 000 B, // 1 MB = 1 000 000 B). Per user request, the Go port diverges // from python agent_api.py:1643 which uses 1 KB = 1 024 B // (binary). The header doc comments on parseMaxBodySize call // this out so future audits can find the divergence. bytesPerKB int64 = 1000 bytesPerMB int64 = bytesPerKB * 1000 // webhookBodyMaxBytes is the hard ceiling above which a configured // max_body_size is rejected at config-validation time. Stated // in decimal MB to match the kb/mb unit base above; 10 MB // decimal = 10 000 000 bytes. The python reference // (agent_api.py:1652) uses 10 * 1024 * 1024 = 10 485 760 bytes, // so this is ~486 KB stricter than python. webhookBodyMaxBytes int64 = 10 * bytesPerMB // webhookRateLimitTimeout is the redis call timeout used for the // strict token-bucket lookup. Short on purpose — a security gate // must not stall the request thread. webhookRateLimitTimeout = 500 * time.Millisecond // jwtReservedClaims is the python-parity set at agent_api.py:1800 // — required_claims must NOT name any of these. jwtReservedClaims = "exp,sub,aud,iss,nbf,iat" ) // validateWebhookSecurity is the orchestrator. Empty/nil cfg → no-op // (matches agent_api.py:1607 "No security config → allowed by default"). // // Sub-validators run in the python-defined order: // 1. validateMaxBodySize // 2. validateIPWhitelist // 3. validateRateLimit // 4. validateAuth (dispatches on auth_type) func validateWebhookSecurity( securityCfg map[string]any, c *gin.Context, canvasID string, ) error { if len(securityCfg) == 0 { return nil } if err := validateMaxBodySize(c, securityCfg); err != nil { return err } if err := validateIPWhitelist(c, securityCfg); err != nil { return err } if err := validateRateLimit(canvasID, securityCfg); err != nil { return err } return validateAuth(c, securityCfg) } // validateMaxBodySize mirrors python agent_api.py:1636-1658. // // Format: "kb" | "mb" (case-insensitive). Anything else is a // config bug. The configured limit is capped at webhookBodyMaxBytes // (10 MB) — exceeding that is also a config error, not silently raised. // The actual request size is then compared against the parsed limit. func validateMaxBodySize(c *gin.Context, cfg map[string]any) error { limit, err := parseMaxBodySize(cfg) if err != nil { return err } if limit <= 0 { return nil } contentLength := c.Request.ContentLength if contentLength < 0 { contentLength = 0 } if contentLength > limit { return fmt.Errorf("request body too large: %d > %d", contentLength, limit) } return nil } // parseMaxBodySize returns the byte limit configured by // `max_body_size` (with the 10 MB cap enforced) or 0 when no limit // is configured. The handler uses this to wrap c.Request.Body in // http.MaxBytesReader so the actual stream read is bounded — the // Content-Length header check alone is insufficient because a client // can advertise a small Content-Length and stream more. // // Unit base: 1 kb = 1 000 B, 1 mb = 1 000 000 B (SI decimal). Per // user request; note this DIVERGES from the python reference // (agent_api.py:1643 uses binary 1 KB = 1 024 B). // // Overflow guard (CodeRabbit PR review #3 on PR #16403): a very // large configured n (e.g. "10000000mb") would otherwise overflow // `n * bytesPerMB` and wrap to a small positive number, bypassing // the 10 MB cap. We check the parsed number against the cap before // multiplying. func parseMaxBodySize(cfg map[string]any) (int64, error) { raw, ok := cfg["max_body_size"].(string) if !ok || raw == "" { return 0, nil } sizeStr := strings.ToLower(strings.TrimSpace(raw)) var limit int64 switch { case strings.HasSuffix(sizeStr, "kb"): n, err := strconv.ParseInt(strings.TrimSuffix(sizeStr, "kb"), 10, 64) if err != nil || n <= 0 { return 0, fmt.Errorf("invalid max_body_size format") } if n > webhookBodyMaxBytes/bytesPerKB { return 0, fmt.Errorf("max_body_size exceeds maximum allowed size (10MB)") } limit = n * bytesPerKB case strings.HasSuffix(sizeStr, "mb"): n, err := strconv.ParseInt(strings.TrimSuffix(sizeStr, "mb"), 10, 64) if err != nil || n <= 0 { return 0, fmt.Errorf("invalid max_body_size format") } if n > webhookBodyMaxBytes/bytesPerMB { return 0, fmt.Errorf("max_body_size exceeds maximum allowed size (10MB)") } limit = n * bytesPerMB default: return 0, fmt.Errorf("invalid max_body_size format") } if limit > webhookBodyMaxBytes { return 0, fmt.Errorf("max_body_size exceeds maximum allowed size (10MB)") } return limit, nil } // validateIPWhitelist mirrors python agent_api.py:1660-1679. Empty // list → allow. Supports CIDR ("10.0.0.0/8") and exact ("1.2.3.4"). // The client IP comes from gin's c.ClientIP() which honours // X-Forwarded-For when trusted proxies are configured. func validateIPWhitelist(c *gin.Context, cfg map[string]any) error { whitelist, _ := cfg["ip_whitelist"].([]any) if len(whitelist) == 0 { return nil } clientIP := c.ClientIP() for _, raw := range whitelist { rule, _ := raw.(string) if rule == "" { continue } if strings.Contains(rule, "/") { // CIDR _, ipNet, err := net.ParseCIDR(rule) if err != nil { continue } addr := net.ParseIP(clientIP) if addr != nil && ipNet.Contains(addr) { return nil } continue } // Exact match if clientIP == rule { return nil } } return fmt.Errorf("IP %s is not allowed by whitelist", clientIP) } // validateRateLimit mirrors python agent_api.py:1681-1723. // // Window mapping (matches agent_api.py:1692-1697): // // second → 1s, minute → 60s, hour → 3600s, day → 86400s. // // Unknown per → error (NOT silently fall through). // // Strict fail-closed: any Redis error → error. The webhook handler // surfaces this as 102 so an operator notices a misconfiguration. func validateRateLimit(canvasID string, cfg map[string]any) error { rawRL, ok := cfg["rate_limit"].(map[string]any) if !ok || len(rawRL) == 0 { return nil } limitF, ok := rawRL["limit"].(float64) if !ok { // JSON numbers often come back as float64; try int as well. if limitI, ok2 := rawRL["limit"].(int); ok2 { limitF = float64(limitI) ok = true } } if !ok || limitF <= 0 { return fmt.Errorf("rate_limit.limit must be > 0") } per, _ := rawRL["per"].(string) if per == "" { per = "minute" } var window float64 switch per { case "second": window = 1 case "minute": window = 60 case "hour": window = 3600 case "day": window = 86400 default: return fmt.Errorf("invalid rate_limit.per: %s", per) } key := fmt.Sprintf("rl:tb:%s", canvasID) ctx, cancel := context.WithTimeout(context.Background(), webhookRateLimitTimeout) defer cancel() rdb := rediscli.Get() if rdb == nil { return fmt.Errorf("rate limit error: redis not initialised") } allowed, err := rdb.EvalTokenBucketStrict(ctx, key, limitF, limitF/window) if err != nil { return fmt.Errorf("rate limit error: %s", err.Error()) } if !allowed { return fmt.Errorf("too many requests (rate limit exceeded)") } return nil } // validateAuth dispatches on auth_type. Empty cfg or auth_type=="none" // → allow (matches agent_api.py:1621). func validateAuth(c *gin.Context, cfg map[string]any) error { authType, _ := cfg["auth_type"].(string) if authType == "" || authType == "none" { return nil } switch authType { case "token": return validateTokenAuth(c, cfg) case "basic": return validateBasicAuth(c, cfg) case "jwt": return validateJWTAuth(c, cfg) } return fmt.Errorf("unsupported auth_type: %s", authType) } // validateTokenAuth mirrors python agent_api.py:1725-1733. // // An empty configured `token_value` previously meant "accept any // request without that header". We now require both header and // value to be non-empty configured secrets, otherwise the request // is rejected as misconfigured. CodeRabbit PR review #4. func validateTokenAuth(c *gin.Context, cfg map[string]any) error { rawToken, _ := cfg["token"].(map[string]any) if rawToken == nil { return fmt.Errorf("Invalid token authentication") } header, _ := rawToken["token_header"].(string) want, _ := rawToken["token_value"].(string) if header == "" || want == "" { return fmt.Errorf("Invalid token authentication") } if c.GetHeader(header) != want { return fmt.Errorf("Invalid token authentication") } return nil } // validateBasicAuth mirrors python agent_api.py:1735-1743. We use // gin's c.Request.BasicAuth() which parses the Authorization header // and returns the (user, pass, ok) triple. Empty configured // username/password are now rejected (CodeRabbit PR review #4). func validateBasicAuth(c *gin.Context, cfg map[string]any) error { rawBasic, _ := cfg["basic_auth"].(map[string]any) if rawBasic == nil { return fmt.Errorf("Invalid Basic Auth credentials") } username, _ := rawBasic["username"].(string) password, _ := rawBasic["password"].(string) if username == "" || password == "" { return fmt.Errorf("Invalid Basic Auth credentials") } u, p, ok := c.Request.BasicAuth() if !ok || u != username || p != password { return fmt.Errorf("Invalid Basic Auth credentials") } return nil } // validateJWTAuth mirrors python agent_api.py:1745-1809. // // Algorithm defaults to HS256. audience / issuer are validated only // when configured. required_claims rejects reserved JWT claims // (exp sub aud iss nbf iat) and any missing claims. // // Algorithms: // - HS256 / HS384 / HS512 → secret is a shared HMAC key. // - RS256 / RS384 / RS512 → secret is a PEM-encoded RSA public key. // - ES256 / ES384 / ES512 → secret is a PEM-encoded EC public key. // // The Python reference uses the same `secret` field for all three // families (the python jwt library is happy to take either a string // or a PEM block); we mirror that with one `secret` config slot and // dispatch on the algorithm. func validateJWTAuth(c *gin.Context, cfg map[string]any) error { rawJWT, _ := cfg["jwt"].(map[string]any) if rawJWT == nil { return fmt.Errorf("jwt secret not configured") } secret, _ := rawJWT["secret"].(string) if secret == "" { return fmt.Errorf("jwt secret not configured") } authHeader := c.GetHeader("Authorization") const prefix = "Bearer " if !strings.HasPrefix(authHeader, prefix) { return fmt.Errorf("missing bearer token") } tokenStr := strings.TrimSpace(authHeader[len(prefix):]) if tokenStr == "" { return fmt.Errorf("empty bearer token") } alg, _ := rawJWT["algorithm"].(string) if alg == "" { alg = "HS256" } alg = strings.ToUpper(alg) // Build the parser options. parserOpts := []jwt.ParserOption{ jwt.WithValidMethods([]string{alg}), } if aud, ok := rawJWT["audience"].(string); ok && aud != "" { parserOpts = append(parserOpts, jwt.WithAudience(aud)) } if iss, ok := rawJWT["issuer"].(string); ok && iss != "" { parserOpts = append(parserOpts, jwt.WithIssuer(iss)) } keyFunc, keyErr := jwtKeyFunc(alg, secret) if keyErr != nil { return keyErr } token, err := jwt.Parse(tokenStr, keyFunc, parserOpts...) if err != nil { return fmt.Errorf("invalid jwt: %s", err.Error()) } if !token.Valid { return fmt.Errorf("invalid jwt") } claims, ok := token.Claims.(jwt.MapClaims) if !ok { return fmt.Errorf("invalid jwt claims") } // Required claims validation (mirrors agent_api.py:1787-1808). required := collectStringSlice(rawJWT["required_claims"]) reserved := splitCSV(jwtReservedClaims) for _, claim := range required { if contains(reserved, claim) { return fmt.Errorf("reserved jwt claim cannot be required: %s", claim) } if _, present := claims[claim]; !present { return fmt.Errorf("missing jwt claim: %s", claim) } } return nil } // jwtKeyFunc returns the verification-key closure that jwt.Parse // invokes. The dispatch mirrors the python jwt library: a string secret // is treated as an HMAC key for HS* algorithms, and as a PEM block for // RS*/ES* algorithms. func jwtKeyFunc(alg, secret string) (jwt.Keyfunc, error) { switch alg { case "HS256", "HS384", "HS512": return func(_ *jwt.Token) (any, error) { return []byte(secret), nil }, nil case "RS256", "RS384", "RS512": pub, err := jwt.ParseRSAPublicKeyFromPEM([]byte(secret)) if err != nil { return nil, fmt.Errorf("jwt rsa public key: %s", err.Error()) } return func(_ *jwt.Token) (any, error) { return pub, nil }, nil case "ES256", "ES384", "ES512": pub, err := jwt.ParseECPublicKeyFromPEM([]byte(secret)) if err != nil { return nil, fmt.Errorf("jwt ec public key: %s", err.Error()) } return func(_ *jwt.Token) (any, error) { return pub, nil }, nil } return nil, fmt.Errorf("unsupported jwt algorithm: %s", alg) } // collectStringSlice accepts the python-shaped `required_claims` value: // a single string OR a list/tuple/set. Mirrors agent_api.py:1788-1798. func collectStringSlice(v any) []string { switch t := v.(type) { case string: s := strings.TrimSpace(t) if s == "" { return nil } return []string{s} case []string: out := make([]string, 0, len(t)) for _, s := range t { s = strings.TrimSpace(s) if s != "" { out = append(out, s) } } return out case []any: out := make([]string, 0, len(t)) for _, item := range t { if s, ok := item.(string); ok { s = strings.TrimSpace(s) if s != "" { out = append(out, s) } } } return out } return nil } func splitCSV(s string) []string { parts := strings.Split(s, ",") out := make([]string, 0, len(parts)) for _, p := range parts { p = strings.TrimSpace(p) if p != "" { out = append(out, p) } } return out }