mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-02 16:55:42 +08:00
Mirrors 14 merged upstream PRs into the Go agent port. PRs ported: - #15609 ExeSQL SSRF guard + DNS pin - #15436 HTTP timeout on external API tools - #16363 be_output restore + DeepL error path - #15644 switch no longer matches empty condition - #15374 session_id bind to path agent_id (DAO idor guard) - #16169 sandbox artifact ownership gate - #15457 tenant ownership on agentbots - #15145 rerun agent document access check - #15446 thinking switch (component portion; provider policy lives in internal/llm) - #15426 Invoke URL/proxy SSRF + DNS pin + no-redirects - #15238 agentbot thinking-logs beta endpoint - #14589 UserFillUp SSE event propagation - #14890 anonymous webhook opt-in - #15068 PipelineChunker new component (text/file_ref/parser_id dispatch; file-format extraction is a follow-up) 40 files, +2355 / -58 lines. 33 new tests, all targeted package suites pass (1721 + 4 skipped); 1 pre-existing flaky test unrelated.
186 lines
5.4 KiB
Python
186 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
|
|
MERGE_PATTERNS = ("<<<<<<< ", "=======\n", ">>>>>>> ")
|
|
|
|
|
|
def _read_bytes(path: Path) -> bytes:
|
|
return path.read_bytes()
|
|
|
|
|
|
def _staged_paths() -> list[Path]:
|
|
proc = subprocess.run(
|
|
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
return [Path(line) for line in proc.stdout.splitlines() if line]
|
|
|
|
|
|
def _report(errors: list[str]) -> int:
|
|
if not errors:
|
|
return 0
|
|
for error in errors:
|
|
print(error, file=sys.stderr)
|
|
return 1
|
|
|
|
|
|
def check_json(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if path.suffix != ".json" or not path.is_file():
|
|
continue
|
|
try:
|
|
json.loads(path.read_text(encoding="utf-8"))
|
|
except Exception as exc:
|
|
errors.append(f"invalid json: {path}: {exc}")
|
|
return _report(errors)
|
|
|
|
|
|
def check_yaml(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if path.suffix not in {".yaml", ".yml"} or not path.is_file():
|
|
continue
|
|
try:
|
|
yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
except Exception as exc:
|
|
errors.append(f"invalid yaml: {path}: {exc}")
|
|
return _report(errors)
|
|
|
|
|
|
def check_eof(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if not path.is_file():
|
|
continue
|
|
data = _read_bytes(path)
|
|
if data and not data.endswith(b"\n"):
|
|
if fix:
|
|
with path.open("ab") as f:
|
|
f.write(b"\n")
|
|
print(f"fixed missing-trailing-newline: {path}", file=sys.stderr)
|
|
else:
|
|
errors.append(f"missing trailing newline: {path}")
|
|
return 0 if fix else _report(errors)
|
|
|
|
|
|
_TRAILING_WS_RE = re.compile(r"[ \t]+(?=\r?\n|$)")
|
|
|
|
|
|
def check_trailing_whitespace(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if not path.is_file():
|
|
continue
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
continue
|
|
if not text:
|
|
continue
|
|
new_text = _TRAILING_WS_RE.sub("", text)
|
|
if new_text == text:
|
|
continue
|
|
if fix:
|
|
path.write_text(new_text, encoding="utf-8")
|
|
print(f"fixed trailing-whitespace: {path}", file=sys.stderr)
|
|
else:
|
|
old_lines = text.splitlines()
|
|
new_lines = new_text.splitlines()
|
|
for i, (orig, new) in enumerate(zip(old_lines, new_lines), 1):
|
|
if orig != new:
|
|
errors.append(f"trailing whitespace: {path}:{i}")
|
|
return 0 if fix else _report(errors)
|
|
|
|
|
|
def check_mixed_line_endings(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if not path.is_file():
|
|
continue
|
|
data = _read_bytes(path)
|
|
has_crlf = b"\r\n" in data
|
|
has_lf = b"\n" in data.replace(b"\r\n", b"")
|
|
if has_crlf and has_lf:
|
|
if fix:
|
|
path.write_bytes(data.replace(b"\r\n", b"\n"))
|
|
print(f"fixed mixed-line-ending: {path}", file=sys.stderr)
|
|
else:
|
|
errors.append(f"mixed line endings: {path}")
|
|
return 0 if fix else _report(errors)
|
|
|
|
|
|
def check_merge_conflicts(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if not path.is_file():
|
|
continue
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
if all(pattern in text for pattern in MERGE_PATTERNS):
|
|
errors.append(f"merge conflict markers: {path}")
|
|
return _report(errors)
|
|
|
|
|
|
def check_symlinks(paths: list[Path], fix: bool = False) -> int:
|
|
errors: list[str] = []
|
|
for path in paths:
|
|
if path.is_symlink() and not path.exists():
|
|
errors.append(f"broken symlink: {path}")
|
|
return _report(errors)
|
|
|
|
|
|
def check_case_conflicts(_: list[Path], fix: bool = False) -> int:
|
|
proc = subprocess.run(
|
|
["git", "ls-files"],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
seen: dict[str, str] = {}
|
|
errors: list[str] = []
|
|
for path in proc.stdout.splitlines():
|
|
lowered = path.lower()
|
|
other = seen.get(lowered)
|
|
if other and other != path:
|
|
errors.append(f"case conflict: {other} <-> {path}")
|
|
seen[lowered] = path
|
|
return _report(errors)
|
|
|
|
|
|
CHECKS = {
|
|
"json": check_json,
|
|
"yaml": check_yaml,
|
|
"eof": check_eof,
|
|
"trailing-whitespace": check_trailing_whitespace,
|
|
"mixed-line-ending": check_mixed_line_endings,
|
|
"merge-conflict": check_merge_conflicts,
|
|
"symlinks": check_symlinks,
|
|
"case-conflict": check_case_conflicts,
|
|
}
|
|
|
|
|
|
def main() -> int:
|
|
args = sys.argv[1:]
|
|
valid = set(CHECKS)
|
|
if not args or args[0] not in valid or len(args) > 2 or (len(args) == 2 and args[1] != "--fix"):
|
|
print(f"usage: {sys.argv[0]} <{'|'.join(valid)}> [--fix]", file=sys.stderr)
|
|
return 2
|
|
fix = len(args) == 2
|
|
paths = _staged_paths()
|
|
return CHECKS[args[0]](paths, fix=fix)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|