2025-12-01 14:24:06 +08:00
|
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
import asyncio
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
from typing import Any, Dict, Optional
|
Potential fix for code scanning alert no. 57: Clear-text logging of sensitive information (#12071)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/57](https://github.com/infiniflow/ragflow/security/code-scanning/57)
In general, the safest fix is to ensure that any logging of request URLs
from `async_request` (and similar helpers) cannot include secrets. This
can be done by (a) suppressing logging entirely for URLs considered
sensitive, or (b) logging only a non-sensitive subset (e.g., scheme +
host + path) and never query strings or credentials.
The minimal, backward-compatible change here is to strengthen
`_redact_sensitive_url_params` and `_is_sensitive_url` / the logging
call so that we never log query parameters at all. Instead of logging
the full URL (with redacted query), we can log only
`scheme://netloc/path` and optionally strip userinfo. This retains
useful observability (which endpoint, which method, response code,
timing) while guaranteeing that no secrets in query strings or path
segments appear in logs. Concretely:
- Update `_redact_sensitive_url_params` to *not* include the query
string in the returned value, and to drop any embedded userinfo
(`username:password@host`).
- Continue to wrap logging in a “sensitive URL” guard, but now the
redaction routine itself ensures no secrets from query are present.
- Leave callers (e.g., `github_callback`, `feishu_callback`) unchanged,
since they only pass URLs and do not control the logging behavior
directly.
All changes are confined to `common/http_client.py` inside the provided
snippet. No new imports are necessary.
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
---------
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:03 +08:00
|
|
|
from urllib.parse import urlparse, urlunparse
|
2025-12-01 14:24:06 +08:00
|
|
|
|
2025-12-11 13:54:47 +08:00
|
|
|
from common import settings
|
2025-12-01 14:24:06 +08:00
|
|
|
import httpx
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
# Default knobs; keep conservative to avoid unexpected behavioural changes.
|
|
|
|
|
DEFAULT_TIMEOUT = float(os.environ.get("HTTP_CLIENT_TIMEOUT", "15"))
|
|
|
|
|
# Align with requests default: follow redirects with a max of 30 unless overridden.
|
2025-12-09 02:35:03 +01:00
|
|
|
DEFAULT_FOLLOW_REDIRECTS = bool(
|
|
|
|
|
int(os.environ.get("HTTP_CLIENT_FOLLOW_REDIRECTS", "1"))
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
DEFAULT_MAX_REDIRECTS = int(os.environ.get("HTTP_CLIENT_MAX_REDIRECTS", "30"))
|
|
|
|
|
DEFAULT_MAX_RETRIES = int(os.environ.get("HTTP_CLIENT_MAX_RETRIES", "2"))
|
|
|
|
|
DEFAULT_BACKOFF_FACTOR = float(os.environ.get("HTTP_CLIENT_BACKOFF_FACTOR", "0.5"))
|
|
|
|
|
DEFAULT_PROXY = os.environ.get("HTTP_CLIENT_PROXY")
|
|
|
|
|
DEFAULT_USER_AGENT = os.environ.get("HTTP_CLIENT_USER_AGENT", "ragflow-http-client")
|
|
|
|
|
|
|
|
|
|
|
2025-12-09 02:35:03 +01:00
|
|
|
def _clean_headers(
|
|
|
|
|
headers: Optional[Dict[str, str]], auth_token: Optional[str] = None
|
|
|
|
|
) -> Optional[Dict[str, str]]:
|
2025-12-01 14:24:06 +08:00
|
|
|
merged_headers: Dict[str, str] = {}
|
|
|
|
|
if DEFAULT_USER_AGENT:
|
|
|
|
|
merged_headers["User-Agent"] = DEFAULT_USER_AGENT
|
|
|
|
|
if auth_token:
|
|
|
|
|
merged_headers["Authorization"] = auth_token
|
|
|
|
|
if headers is None:
|
|
|
|
|
return merged_headers or None
|
|
|
|
|
merged_headers.update({str(k): str(v) for k, v in headers.items() if v is not None})
|
|
|
|
|
return merged_headers or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_delay(backoff_factor: float, attempt: int) -> float:
|
|
|
|
|
return backoff_factor * (2**attempt)
|
|
|
|
|
|
|
|
|
|
|
2025-12-10 19:08:45 +08:00
|
|
|
# List of sensitive parameters to redact from URLs before logging
|
|
|
|
|
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
|
|
|
|
|
|
|
|
|
|
def _redact_sensitive_url_params(url: str) -> str:
|
Potential fix for code scanning alert no. 57: Clear-text logging of sensitive information (#12071)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/57](https://github.com/infiniflow/ragflow/security/code-scanning/57)
In general, the safest fix is to ensure that any logging of request URLs
from `async_request` (and similar helpers) cannot include secrets. This
can be done by (a) suppressing logging entirely for URLs considered
sensitive, or (b) logging only a non-sensitive subset (e.g., scheme +
host + path) and never query strings or credentials.
The minimal, backward-compatible change here is to strengthen
`_redact_sensitive_url_params` and `_is_sensitive_url` / the logging
call so that we never log query parameters at all. Instead of logging
the full URL (with redacted query), we can log only
`scheme://netloc/path` and optionally strip userinfo. This retains
useful observability (which endpoint, which method, response code,
timing) while guaranteeing that no secrets in query strings or path
segments appear in logs. Concretely:
- Update `_redact_sensitive_url_params` to *not* include the query
string in the returned value, and to drop any embedded userinfo
(`username:password@host`).
- Continue to wrap logging in a “sensitive URL” guard, but now the
redaction routine itself ensures no secrets from query are present.
- Leave callers (e.g., `github_callback`, `feishu_callback`) unchanged,
since they only pass URLs and do not control the logging behavior
directly.
All changes are confined to `common/http_client.py` inside the provided
snippet. No new imports are necessary.
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
---------
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:03 +08:00
|
|
|
"""
|
|
|
|
|
Return a version of the URL that is safe to log.
|
|
|
|
|
|
|
|
|
|
We intentionally drop query parameters and userinfo to avoid leaking
|
|
|
|
|
credentials or tokens via logs. Only scheme, host, port and path
|
|
|
|
|
are preserved.
|
|
|
|
|
"""
|
2025-12-10 19:08:45 +08:00
|
|
|
try:
|
|
|
|
|
parsed = urlparse(url)
|
Potential fix for code scanning alert no. 57: Clear-text logging of sensitive information (#12071)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/57](https://github.com/infiniflow/ragflow/security/code-scanning/57)
In general, the safest fix is to ensure that any logging of request URLs
from `async_request` (and similar helpers) cannot include secrets. This
can be done by (a) suppressing logging entirely for URLs considered
sensitive, or (b) logging only a non-sensitive subset (e.g., scheme +
host + path) and never query strings or credentials.
The minimal, backward-compatible change here is to strengthen
`_redact_sensitive_url_params` and `_is_sensitive_url` / the logging
call so that we never log query parameters at all. Instead of logging
the full URL (with redacted query), we can log only
`scheme://netloc/path` and optionally strip userinfo. This retains
useful observability (which endpoint, which method, response code,
timing) while guaranteeing that no secrets in query strings or path
segments appear in logs. Concretely:
- Update `_redact_sensitive_url_params` to *not* include the query
string in the returned value, and to drop any embedded userinfo
(`username:password@host`).
- Continue to wrap logging in a “sensitive URL” guard, but now the
redaction routine itself ensures no secrets from query are present.
- Leave callers (e.g., `github_callback`, `feishu_callback`) unchanged,
since they only pass URLs and do not control the logging behavior
directly.
All changes are confined to `common/http_client.py` inside the provided
snippet. No new imports are necessary.
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
---------
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:03 +08:00
|
|
|
# Remove any potential userinfo (username:password@)
|
|
|
|
|
netloc = parsed.hostname or ""
|
|
|
|
|
if parsed.port:
|
|
|
|
|
netloc = f"{netloc}:{parsed.port}"
|
|
|
|
|
# Reconstruct URL without query, params, fragment, or userinfo.
|
|
|
|
|
safe_url = urlunparse(
|
|
|
|
|
(
|
|
|
|
|
parsed.scheme,
|
|
|
|
|
netloc,
|
|
|
|
|
parsed.path,
|
|
|
|
|
"", # params
|
|
|
|
|
"", # query
|
|
|
|
|
"", # fragment
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return safe_url
|
2025-12-10 19:08:45 +08:00
|
|
|
except Exception:
|
Potential fix for code scanning alert no. 57: Clear-text logging of sensitive information (#12071)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/57](https://github.com/infiniflow/ragflow/security/code-scanning/57)
In general, the safest fix is to ensure that any logging of request URLs
from `async_request` (and similar helpers) cannot include secrets. This
can be done by (a) suppressing logging entirely for URLs considered
sensitive, or (b) logging only a non-sensitive subset (e.g., scheme +
host + path) and never query strings or credentials.
The minimal, backward-compatible change here is to strengthen
`_redact_sensitive_url_params` and `_is_sensitive_url` / the logging
call so that we never log query parameters at all. Instead of logging
the full URL (with redacted query), we can log only
`scheme://netloc/path` and optionally strip userinfo. This retains
useful observability (which endpoint, which method, response code,
timing) while guaranteeing that no secrets in query strings or path
segments appear in logs. Concretely:
- Update `_redact_sensitive_url_params` to *not* include the query
string in the returned value, and to drop any embedded userinfo
(`username:password@host`).
- Continue to wrap logging in a “sensitive URL” guard, but now the
redaction routine itself ensures no secrets from query are present.
- Leave callers (e.g., `github_callback`, `feishu_callback`) unchanged,
since they only pass URLs and do not control the logging behavior
directly.
All changes are confined to `common/http_client.py` inside the provided
snippet. No new imports are necessary.
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
---------
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:03 +08:00
|
|
|
# If parsing fails, fall back to omitting the URL entirely.
|
|
|
|
|
return "<redacted-url>"
|
2025-12-10 19:08:45 +08:00
|
|
|
|
2025-12-11 13:54:47 +08:00
|
|
|
def _is_sensitive_url(url: str) -> bool:
|
|
|
|
|
"""Return True if URL is one of the configured OAuth endpoints."""
|
|
|
|
|
# Collect known sensitive endpoint URLs from settings
|
|
|
|
|
oauth_urls = set()
|
|
|
|
|
# GitHub OAuth endpoints
|
|
|
|
|
try:
|
|
|
|
|
if settings.GITHUB_OAUTH is not None:
|
|
|
|
|
url_val = settings.GITHUB_OAUTH.get("url")
|
|
|
|
|
if url_val:
|
|
|
|
|
oauth_urls.add(url_val)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
# Feishu OAuth endpoints
|
|
|
|
|
try:
|
|
|
|
|
if settings.FEISHU_OAUTH is not None:
|
|
|
|
|
for k in ("app_access_token_url", "user_access_token_url"):
|
|
|
|
|
url_val = settings.FEISHU_OAUTH.get(k)
|
|
|
|
|
if url_val:
|
|
|
|
|
oauth_urls.add(url_val)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
# Defensive normalization: compare only scheme+netloc+path
|
|
|
|
|
url_obj = urlparse(url)
|
|
|
|
|
for sensitive_url in oauth_urls:
|
|
|
|
|
sensitive_obj = urlparse(sensitive_url)
|
|
|
|
|
if (url_obj.scheme, url_obj.netloc, url_obj.path) == (sensitive_obj.scheme, sensitive_obj.netloc, sensitive_obj.path):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
2025-12-10 19:08:45 +08:00
|
|
|
|
2025-12-01 14:24:06 +08:00
|
|
|
async def async_request(
|
|
|
|
|
method: str,
|
|
|
|
|
url: str,
|
|
|
|
|
*,
|
2025-12-09 16:23:37 +08:00
|
|
|
request_timeout: float | httpx.Timeout | None = None,
|
2025-12-01 14:24:06 +08:00
|
|
|
follow_redirects: bool | None = None,
|
|
|
|
|
max_redirects: Optional[int] = None,
|
|
|
|
|
headers: Optional[Dict[str, str]] = None,
|
|
|
|
|
auth_token: Optional[str] = None,
|
|
|
|
|
retries: Optional[int] = None,
|
|
|
|
|
backoff_factor: Optional[float] = None,
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy: Any = None,
|
2025-12-01 14:24:06 +08:00
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> httpx.Response:
|
|
|
|
|
"""Lightweight async HTTP wrapper using httpx.AsyncClient with safe defaults."""
|
2025-12-09 16:23:37 +08:00
|
|
|
timeout = request_timeout if request_timeout is not None else DEFAULT_TIMEOUT
|
2025-12-09 02:35:03 +01:00
|
|
|
follow_redirects = (
|
|
|
|
|
DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
max_redirects = DEFAULT_MAX_REDIRECTS if max_redirects is None else max_redirects
|
|
|
|
|
retries = DEFAULT_MAX_RETRIES if retries is None else max(retries, 0)
|
2025-12-09 02:35:03 +01:00
|
|
|
backoff_factor = (
|
|
|
|
|
DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
headers = _clean_headers(headers, auth_token=auth_token)
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy = DEFAULT_PROXY if proxy is None else proxy
|
2025-12-01 14:24:06 +08:00
|
|
|
|
|
|
|
|
async with httpx.AsyncClient(
|
|
|
|
|
timeout=timeout,
|
|
|
|
|
follow_redirects=follow_redirects,
|
|
|
|
|
max_redirects=max_redirects,
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy=proxy,
|
2025-12-01 14:24:06 +08:00
|
|
|
) as client:
|
|
|
|
|
last_exc: Exception | None = None
|
|
|
|
|
for attempt in range(retries + 1):
|
|
|
|
|
try:
|
|
|
|
|
start = time.monotonic()
|
2025-12-09 02:35:03 +01:00
|
|
|
response = await client.request(
|
|
|
|
|
method=method, url=url, headers=headers, **kwargs
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
duration = time.monotonic() - start
|
2025-12-18 14:18:03 +08:00
|
|
|
if not _is_sensitive_url(url):
|
|
|
|
|
log_url = _redact_sensitive_url_params(url)
|
|
|
|
|
logger.debug(f"async_request {method} {log_url} -> {response.status_code} in {duration:.3f}s")
|
2025-12-01 14:24:06 +08:00
|
|
|
return response
|
|
|
|
|
except httpx.RequestError as exc:
|
|
|
|
|
last_exc = exc
|
|
|
|
|
if attempt >= retries:
|
2025-12-18 14:18:03 +08:00
|
|
|
if not _is_sensitive_url(url):
|
|
|
|
|
log_url = _redact_sensitive_url_params(url)
|
Potential fix for code scanning alert no. 58: Clear-text logging of sensitive information (#12070)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/58](https://github.com/infiniflow/ragflow/security/code-scanning/58)
General approach: avoid logging potentially sensitive URLs (especially
at warning level) or ensure they are fully and robustly redacted before
logging. Since this client is shared and used with OAuth endpoints, the
safest minimal-change fix is to stop including the URL in warning logs
(retries exhausted and retry attempts) and only log the HTTP method and
a generic message. Debug logs can continue using the existing redaction
helper for non-sensitive URLs if desired.
Best concrete fix without changing functionality: in
`common/http_client.py`, in `async_request`, change the retry-exhausted
and retry-attempt warning log statements so that they no longer
interpolate `log_url` (and thus the tainted `url`). We can still compute
`log_url` if needed elsewhere, but the log string itself should not
contain `log_url`. This directly removes the tainted data from the sink
while preserving information about errors and retry behavior. No changes
are required in `common/settings.py` or `api/apps/user_app.py`, and we
do not need new imports or helpers.
Specifically:
- In `common/http_client.py`, around line 152–163, replace the two
warning logs:
- `logger.warning(f"async_request exhausted retries for {method}
{log_url}")`
- `logger.warning(f"async_request attempt {attempt + 1}/{retries + 1}
failed for {method} {log_url}; retrying in {delay:.2f}s")`
with versions that omit `{log_url}`, such as:
- `logger.warning(f"async_request exhausted retries for {method}")`
- `logger.warning(f"async_request attempt {attempt + 1}/{retries + 1}
failed for {method}; retrying in {delay:.2f}s")`
This ensures no URL-derived data flows into these warning logs,
addressing all variants of the alert, since they all trace to the same
sink.
---
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:25 +08:00
|
|
|
logger.warning(f"async_request exhausted retries for {method}")
|
2025-12-01 14:24:06 +08:00
|
|
|
raise
|
|
|
|
|
delay = _get_delay(backoff_factor, attempt)
|
2025-12-18 14:18:03 +08:00
|
|
|
if not _is_sensitive_url(url):
|
|
|
|
|
log_url = _redact_sensitive_url_params(url)
|
|
|
|
|
logger.warning(
|
Potential fix for code scanning alert no. 58: Clear-text logging of sensitive information (#12070)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/58](https://github.com/infiniflow/ragflow/security/code-scanning/58)
General approach: avoid logging potentially sensitive URLs (especially
at warning level) or ensure they are fully and robustly redacted before
logging. Since this client is shared and used with OAuth endpoints, the
safest minimal-change fix is to stop including the URL in warning logs
(retries exhausted and retry attempts) and only log the HTTP method and
a generic message. Debug logs can continue using the existing redaction
helper for non-sensitive URLs if desired.
Best concrete fix without changing functionality: in
`common/http_client.py`, in `async_request`, change the retry-exhausted
and retry-attempt warning log statements so that they no longer
interpolate `log_url` (and thus the tainted `url`). We can still compute
`log_url` if needed elsewhere, but the log string itself should not
contain `log_url`. This directly removes the tainted data from the sink
while preserving information about errors and retry behavior. No changes
are required in `common/settings.py` or `api/apps/user_app.py`, and we
do not need new imports or helpers.
Specifically:
- In `common/http_client.py`, around line 152–163, replace the two
warning logs:
- `logger.warning(f"async_request exhausted retries for {method}
{log_url}")`
- `logger.warning(f"async_request attempt {attempt + 1}/{retries + 1}
failed for {method} {log_url}; retrying in {delay:.2f}s")`
with versions that omit `{log_url}`, such as:
- `logger.warning(f"async_request exhausted retries for {method}")`
- `logger.warning(f"async_request attempt {attempt + 1}/{retries + 1}
failed for {method}; retrying in {delay:.2f}s")`
This ensures no URL-derived data flows into these warning logs,
addressing all variants of the alert, since they all trace to the same
sink.
---
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-12-22 13:31:25 +08:00
|
|
|
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method}; retrying in {delay:.2f}s"
|
2025-12-18 14:18:03 +08:00
|
|
|
)
|
Potential fix for code scanning alert no. 59: Clear-text logging of sensitive information (#12069)
Potential fix for
[https://github.com/infiniflow/ragflow/security/code-scanning/59](https://github.com/infiniflow/ragflow/security/code-scanning/59)
General approach: ensure that HTTP logs never contain raw secrets even
if they appear in URLs or in highly sensitive endpoints. There are two
complementary strategies: (1) for clearly sensitive endpoints (e.g.,
OAuth token URLs), completely suppress URL logging; and (2) ensure that
any URL that is logged is strongly redacted for any parameter name that
might carry a secret, and in a way that static analysis can see is a
dedicated sanitization step.
Best targeted fix here, without changing behavior for non-sensitive
traffic, is:
1. Strengthen the `_SENSITIVE_QUERY_KEYS` set to include any likely
secret-bearing keys (e.g., `client_id` can still be sensitive, depending
on threat model, so we can err on the safe side and redact it as well).
2. Ensure `_is_sensitive_url` (in `common/http_client.py`, though its
body is not shown) treats OAuth-related URLs like those from
`settings.GITHUB_OAUTH` and `settings.FEISHU_OAUTH` as sensitive and
thus disables URL logging. Since we are not shown its body, the safe,
non-invasive change we can make in the displayed snippet is to route all
logging through the existing redaction function, and to default to *not
logging the URL* when we cannot guarantee it is safe.
3. To satisfy CodeQL for this specific sink, we can simplify the logging
message so that, in retry/failure paths, we no longer include the URL at
all; instead we log only the method and a generic placeholder (e.g.,
`"async_request attempt ... failed; retrying..."`). This fully removes
the tainted URL from the sink and addresses all alert variants for that
logging statement, while preserving useful operational information
(method, attempt index, delay).
Concretely, in `common/http_client.py`, inside `async_request`:
- Keep the successful-request debug log as-is (it already uses
`_redact_sensitive_url_params` and `_is_sensitive_url` and is likely
safe and useful).
- In the `except httpx.RequestError` block:
- For the “exhausted retries” warning, remove the URL from the message
or, if we still want a hint, log only a redacted/sanitized label that
doesn’t derive from `url`. The simplest is to omit the URL entirely.
- For the per-attempt failure warning (line 162), similarly remove
`log_url` (and thus any use of `url`) from the formatted message so that
the sink no longer contains tainted data.
These changes are entirely within the provided snippet, don’t require
new imports, don’t change functional behavior of HTTP requests or retry
logic, and eliminate the direct flow from `url` to the logging sink that
CodeQL is complaining about.
---
_Suggested fixes powered by Copilot Autofix. Review carefully before
merging._
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-12-22 13:46:44 +08:00
|
|
|
raise
|
|
|
|
|
delay = _get_delay(backoff_factor, attempt)
|
|
|
|
|
# Avoid including the (potentially sensitive) URL in retry logs.
|
|
|
|
|
logger.warning(
|
|
|
|
|
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method}; retrying in {delay:.2f}s"
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
await asyncio.sleep(delay)
|
|
|
|
|
raise last_exc # pragma: no cover
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sync_request(
|
|
|
|
|
method: str,
|
|
|
|
|
url: str,
|
|
|
|
|
*,
|
|
|
|
|
timeout: float | httpx.Timeout | None = None,
|
|
|
|
|
follow_redirects: bool | None = None,
|
|
|
|
|
max_redirects: Optional[int] = None,
|
|
|
|
|
headers: Optional[Dict[str, str]] = None,
|
|
|
|
|
auth_token: Optional[str] = None,
|
|
|
|
|
retries: Optional[int] = None,
|
|
|
|
|
backoff_factor: Optional[float] = None,
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy: Any = None,
|
2025-12-01 14:24:06 +08:00
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> httpx.Response:
|
|
|
|
|
"""Synchronous counterpart to async_request, for CLI/tests or sync contexts."""
|
|
|
|
|
timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
|
2025-12-09 02:35:03 +01:00
|
|
|
follow_redirects = (
|
|
|
|
|
DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
max_redirects = DEFAULT_MAX_REDIRECTS if max_redirects is None else max_redirects
|
|
|
|
|
retries = DEFAULT_MAX_RETRIES if retries is None else max(retries, 0)
|
2025-12-09 02:35:03 +01:00
|
|
|
backoff_factor = (
|
|
|
|
|
DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
headers = _clean_headers(headers, auth_token=auth_token)
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy = DEFAULT_PROXY if proxy is None else proxy
|
2025-12-01 14:24:06 +08:00
|
|
|
|
|
|
|
|
with httpx.Client(
|
|
|
|
|
timeout=timeout,
|
|
|
|
|
follow_redirects=follow_redirects,
|
|
|
|
|
max_redirects=max_redirects,
|
2025-12-09 02:35:03 +01:00
|
|
|
proxy=proxy,
|
2025-12-01 14:24:06 +08:00
|
|
|
) as client:
|
|
|
|
|
last_exc: Exception | None = None
|
|
|
|
|
for attempt in range(retries + 1):
|
|
|
|
|
try:
|
|
|
|
|
start = time.monotonic()
|
2025-12-09 02:35:03 +01:00
|
|
|
response = client.request(
|
|
|
|
|
method=method, url=url, headers=headers, **kwargs
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
duration = time.monotonic() - start
|
2025-12-09 02:35:03 +01:00
|
|
|
logger.debug(
|
|
|
|
|
f"sync_request {method} {url} -> {response.status_code} in {duration:.3f}s"
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
return response
|
|
|
|
|
except httpx.RequestError as exc:
|
|
|
|
|
last_exc = exc
|
|
|
|
|
if attempt >= retries:
|
2025-12-09 02:35:03 +01:00
|
|
|
logger.warning(
|
|
|
|
|
f"sync_request exhausted retries for {method} {url}: {exc}"
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
raise
|
|
|
|
|
delay = _get_delay(backoff_factor, attempt)
|
2025-12-09 02:35:03 +01:00
|
|
|
logger.warning(
|
|
|
|
|
f"sync_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s"
|
|
|
|
|
)
|
2025-12-01 14:24:06 +08:00
|
|
|
time.sleep(delay)
|
|
|
|
|
raise last_exc # pragma: no cover
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
"async_request",
|
|
|
|
|
"sync_request",
|
|
|
|
|
"DEFAULT_TIMEOUT",
|
|
|
|
|
"DEFAULT_FOLLOW_REDIRECTS",
|
|
|
|
|
"DEFAULT_MAX_REDIRECTS",
|
|
|
|
|
"DEFAULT_MAX_RETRIES",
|
|
|
|
|
"DEFAULT_BACKOFF_FACTOR",
|
|
|
|
|
"DEFAULT_PROXY",
|
|
|
|
|
"DEFAULT_USER_AGENT",
|
|
|
|
|
]
|