mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Harden closed-advisory fixes (#16409)
## Summary - harden reopened advisory fixes across REST connector, invoke, document downloads, and markdown rendering - add targeted regression coverage for redirect-safe SSRF handling, invoke SSRF checks, document access control, and markdown sanitization - verify each referenced GHSA against the original GitHub advisory text and align the closed-advisory plan with the implemented remediation ## What changed - add tenant access checks to document download endpoints to avoid cross-tenant document disclosure - add per-hop SSRF validation, DNS pinning, redirect handling, and redirect limits to the REST API connector - ensure invoke requests validate and pin the resolved host and never follow redirects implicitly - keep the generic rate-limited request path wrapped, not just GET and POST helpers - sanitize markdown HTML before rendering in the highlight markdown component ## Validation - `cd web && npm test -- --runInBand src/components/highlight-markdown/__tests__/index.test.tsx` - `.venv/bin/python -m pytest -q test/unit_test/data_source/test_rest_api_connector.py` - targeted `test/testcases/test_web_api/...` unit additions were reviewed, but the suite cannot be executed end-to-end in this environment because parent `test/testcases/conftest.py` requires a local service on `127.0.0.1:9380` ## Notes - all GHSA entries referenced by the plan were checked against the original GitHub advisory text, not sampled - the closed-advisory plan document was updated locally during review, but is intentionally not included in this PR
This commit is contained in:
@@ -25,6 +25,7 @@ import requests
|
||||
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
from common.connection_utils import timeout
|
||||
from common.ssrf_guard import assert_url_is_safe, pin_dns
|
||||
from deepdoc.parser import HtmlParser
|
||||
|
||||
|
||||
@@ -56,6 +57,11 @@ class Invoke(ComponentBase, ABC):
|
||||
component_name = "Invoke"
|
||||
header_variable_ref_patt = r"\{([a-zA-Z_][a-zA-Z0-9_.@-]*)\}"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._pinned_hostname: str | None = None
|
||||
self._pinned_ip: str | None = None
|
||||
|
||||
@staticmethod
|
||||
def _coerce_json_arg_if_possible(key, value):
|
||||
raw_value = value
|
||||
@@ -169,6 +175,9 @@ class Invoke(ComponentBase, ABC):
|
||||
url = self._resolve_template_text(self._param.url.strip(), kwargs)
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = "http://" + url
|
||||
hostname, ip = assert_url_is_safe(url)
|
||||
self._pinned_hostname = hostname
|
||||
self._pinned_ip = ip
|
||||
return url
|
||||
|
||||
def _build_headers(self, kwargs: dict) -> dict:
|
||||
@@ -194,6 +203,7 @@ class Invoke(ComponentBase, ABC):
|
||||
"headers": headers,
|
||||
"proxies": proxies,
|
||||
"timeout": self._param.timeout,
|
||||
"allow_redirects": False,
|
||||
}
|
||||
|
||||
# GET sends query params; POST/PUT send either JSON or form data based on datatype.
|
||||
@@ -219,7 +229,6 @@ class Invoke(ComponentBase, ABC):
|
||||
return
|
||||
|
||||
args = self._build_request_args(kwargs)
|
||||
url = self._build_url(kwargs)
|
||||
headers = self._build_headers(kwargs)
|
||||
proxies = self._build_proxies()
|
||||
|
||||
@@ -229,7 +238,15 @@ class Invoke(ComponentBase, ABC):
|
||||
return
|
||||
|
||||
try:
|
||||
response = self._send_request(url, args, headers, proxies)
|
||||
# Coderabbit MAJOR #3486038788: URL validation is now inside the
|
||||
# retry/except block so SSRF rejections (ValueError from
|
||||
# assert_url_is_safe) populate _ERROR via the standard error
|
||||
# path instead of escaping _invoke().
|
||||
url = self._build_url(kwargs)
|
||||
if not self._pinned_hostname or not self._pinned_ip:
|
||||
raise ValueError("Invoke URL was not validated before request.")
|
||||
with pin_dns(self._pinned_hostname, self._pinned_ip):
|
||||
response = self._send_request(url, args, headers, proxies)
|
||||
result = self._format_response(response)
|
||||
self.set_output("result", result)
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user