mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Fix: validate URL scheme and resolved IP before crawling to prevent SSRF (#14090)
### What problem does this PR solve? The POST /upload_info?url=<url> endpoint accepted a user-supplied URL and passed it directly to AsyncWebCrawler without any validation. There were no restrictions on URL scheme, destination hostname, or resolved IP address. This allowed any authenticated user to instruct the server to make outbound HTTP requests to internal infrastructure — including RFC 1918 private networks, loopback addresses, and cloud metadata services such as http://169.254.169.254 — effectively using the server as a proxy for internal network reconnaissance or credential theft. This PR adds an SSRF guard (_validate_url_for_crawl) that runs before any crawl is initiated. It enforces an allowlist of safe schemes (http/https), resolves the hostname at validation time, and rejects any URL whose resolved IP falls within a private or reserved network range. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -179,10 +179,7 @@ class Invoke(ComponentBase, ABC):
|
||||
if not isinstance(headers, dict):
|
||||
raise ValueError("Invoke headers must be a JSON object.")
|
||||
|
||||
return {
|
||||
key: self._resolve_header_text(value, kwargs) if isinstance(value, str) else value
|
||||
for key, value in headers.items()
|
||||
}
|
||||
return {key: self._resolve_header_text(value, kwargs) if isinstance(value, str) else value for key, value in headers.items()}
|
||||
|
||||
def _build_proxies(self) -> dict | None:
|
||||
if not re.sub(r"https?:?/?/?", "", self._param.proxy):
|
||||
@@ -215,7 +212,7 @@ class Invoke(ComponentBase, ABC):
|
||||
# HtmlParser keeps the Invoke output text-focused when the endpoint returns HTML.
|
||||
sections = HtmlParser()(None, response.content)
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Invoke processing"):
|
||||
|
||||
Reference in New Issue
Block a user