mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Harden closed-advisory fixes (#16409)
## Summary - harden reopened advisory fixes across REST connector, invoke, document downloads, and markdown rendering - add targeted regression coverage for redirect-safe SSRF handling, invoke SSRF checks, document access control, and markdown sanitization - verify each referenced GHSA against the original GitHub advisory text and align the closed-advisory plan with the implemented remediation ## What changed - add tenant access checks to document download endpoints to avoid cross-tenant document disclosure - add per-hop SSRF validation, DNS pinning, redirect handling, and redirect limits to the REST API connector - ensure invoke requests validate and pin the resolved host and never follow redirects implicitly - keep the generic rate-limited request path wrapped, not just GET and POST helpers - sanitize markdown HTML before rendering in the highlight markdown component ## Validation - `cd web && npm test -- --runInBand src/components/highlight-markdown/__tests__/index.test.tsx` - `.venv/bin/python -m pytest -q test/unit_test/data_source/test_rest_api_connector.py` - targeted `test/testcases/test_web_api/...` unit additions were reviewed, but the suite cannot be executed end-to-end in this environment because parent `test/testcases/conftest.py` requires a local service on `127.0.0.1:9380` ## Notes - all GHSA entries referenced by the plan were checked against the original GitHub advisory text, not sampled - the closed-advisory plan document was updated locally during review, but is intentionally not included in this PR
This commit is contained in:
@@ -13,7 +13,7 @@ import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional
|
||||
from urllib.parse import parse_qs, urlparse, urlunparse
|
||||
from urllib.parse import parse_qs, urljoin, urlparse, urlunparse
|
||||
|
||||
import ipaddress
|
||||
import socket
|
||||
@@ -35,6 +35,7 @@ from common.data_source.interfaces import (
|
||||
)
|
||||
from common.data_source.models import Document
|
||||
from common.data_source.utils import rl_requests, retry_builder
|
||||
from common.ssrf_guard import assert_url_is_safe, pin_dns
|
||||
|
||||
try:
|
||||
from jsonpath import jsonpath as _jsonpath # type: ignore[import]
|
||||
@@ -43,6 +44,8 @@ except Exception: # pragma: no cover
|
||||
|
||||
_FIELD_SEGMENT_RE = re.compile(r'^(?P<key>[^\[\]]+)(\[(?P<index>\d+|\*)\])?$')
|
||||
_DEFAULT_MAX_PAGES = 1000
|
||||
_REDIRECT_STATUSES = frozenset({301, 302, 303, 307, 308})
|
||||
_MAX_REDIRECTS = 5
|
||||
|
||||
|
||||
class AuthType:
|
||||
@@ -604,11 +607,19 @@ class RestAPIConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
if self.method == "GET":
|
||||
resp = rl_requests.get(url, headers=headers, params=query_params, auth=self._basic_auth, timeout=60)
|
||||
resp = self._safe_request(
|
||||
"GET",
|
||||
url,
|
||||
headers=headers,
|
||||
params=query_params,
|
||||
)
|
||||
elif self.method == "POST":
|
||||
resp = rl_requests.post(
|
||||
url, headers=headers, params=query_params,
|
||||
json=self._static_request_body or {}, auth=self._basic_auth, timeout=60,
|
||||
resp = self._safe_request(
|
||||
"POST",
|
||||
url,
|
||||
headers=headers,
|
||||
params=query_params,
|
||||
json_body=self._static_request_body or {},
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(f"Unsupported HTTP method: {self.method}")
|
||||
@@ -647,6 +658,113 @@ class RestAPIConnector(LoadConnector, PollConnector):
|
||||
except ValueError as exc:
|
||||
raise ConnectorValidationError("REST API response is not valid JSON") from exc
|
||||
|
||||
# Headers that carry auth state. Stripped on cross-origin redirects to
|
||||
# prevent credential exfiltration to a third-party host. (Coderabbit MAJOR #3486038792)
|
||||
_AUTH_SENSITIVE_HEADER_KEYS = frozenset({
|
||||
"authorization",
|
||||
"proxy-authorization",
|
||||
"apikey",
|
||||
"api-key",
|
||||
"x-api-key",
|
||||
"x-auth-token",
|
||||
})
|
||||
|
||||
def _safe_request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
*,
|
||||
headers: Dict[str, str],
|
||||
params: Dict[str, Any],
|
||||
body: Any = None,
|
||||
json_body: Any = None,
|
||||
) -> requests.Response:
|
||||
"""Issue an HTTP request with per-hop SSRF validation and DNS pinning."""
|
||||
current_url = url
|
||||
current_method = method.upper()
|
||||
current_body = body
|
||||
current_json = json_body
|
||||
current_params = dict(params)
|
||||
# Local auth handle: cleared when crossing origins, even though
|
||||
# ``self._basic_auth`` may still hold the original credentials.
|
||||
current_auth = self._basic_auth
|
||||
previous_netloc = urlparse(current_url).netloc
|
||||
|
||||
for _ in range(_MAX_REDIRECTS + 1):
|
||||
# Normalize SSRF validation failures to the connector's documented
|
||||
# ConnectorValidationError so they don't leak ValueError out of
|
||||
# _page_iter_for_validation(). (Coderabbit MAJOR #3486038789)
|
||||
try:
|
||||
hostname, pin_ip = assert_url_is_safe(current_url)
|
||||
except ValueError as exc:
|
||||
raise ConnectorValidationError(
|
||||
f"Unsafe REST API URL: {exc}"
|
||||
) from exc
|
||||
with pin_dns(hostname, pin_ip):
|
||||
if current_method == "GET":
|
||||
resp = rl_requests.get(
|
||||
current_url,
|
||||
headers=headers,
|
||||
params=current_params,
|
||||
auth=current_auth,
|
||||
timeout=60,
|
||||
allow_redirects=False,
|
||||
)
|
||||
elif current_method == "POST":
|
||||
resp = rl_requests.post(
|
||||
current_url,
|
||||
headers=headers,
|
||||
params=current_params,
|
||||
json=current_json,
|
||||
auth=current_auth,
|
||||
timeout=60,
|
||||
allow_redirects=False,
|
||||
)
|
||||
else:
|
||||
resp = rl_requests.request(
|
||||
current_method,
|
||||
current_url,
|
||||
headers=headers,
|
||||
params=current_params,
|
||||
auth=current_auth,
|
||||
timeout=60,
|
||||
data=current_body,
|
||||
json=current_json,
|
||||
allow_redirects=False,
|
||||
)
|
||||
|
||||
if resp.status_code not in _REDIRECT_STATUSES:
|
||||
return resp
|
||||
|
||||
location = resp.headers.get("Location")
|
||||
if not location:
|
||||
return resp
|
||||
|
||||
current_url = urljoin(current_url, location)
|
||||
next_netloc = urlparse(current_url).netloc
|
||||
|
||||
# Coderabbit MAJOR #3486038792: strip credentials when the redirect
|
||||
# crosses to a different origin so a public→private redirect chain
|
||||
# cannot exfiltrate Bearer/Basic/API-key headers.
|
||||
if next_netloc and next_netloc != previous_netloc:
|
||||
headers = {
|
||||
k: v
|
||||
for k, v in headers.items()
|
||||
if k.lower() not in self._AUTH_SENSITIVE_HEADER_KEYS
|
||||
}
|
||||
current_auth = None
|
||||
previous_netloc = next_netloc
|
||||
|
||||
if resp.status_code in (301, 302, 303):
|
||||
current_method = "GET"
|
||||
current_body = None
|
||||
current_json = None
|
||||
# Clear carried params — only the new Location URL's query
|
||||
# string should apply for the downgraded GET.
|
||||
current_params = {}
|
||||
|
||||
raise ConnectorValidationError(f"Exceeded {_MAX_REDIRECTS} redirects fetching {url!r}")
|
||||
|
||||
def _build_url_with_templates(self, params: Dict[str, Any]) -> tuple[str, Dict[str, Any]]:
|
||||
"""Substitute ``{key}`` placeholders in the URL; return remaining query params."""
|
||||
url = self.url
|
||||
|
||||
@@ -224,11 +224,13 @@ def wrap_request_to_handle_ratelimiting(request_fn: R, default_wait_time_sec: in
|
||||
|
||||
_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get)
|
||||
_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post)
|
||||
_rate_limited_request = wrap_request_to_handle_ratelimiting(requests.request)
|
||||
|
||||
|
||||
class _RateLimitedRequest:
|
||||
get = _rate_limited_get
|
||||
post = _rate_limited_post
|
||||
request = _rate_limited_request
|
||||
|
||||
|
||||
rl_requests = _RateLimitedRequest
|
||||
|
||||
Reference in New Issue
Block a user