Initial commit with translated description

This commit is contained in:
2026-03-29 13:06:50 +08:00
commit 673a554c43
6 changed files with 724 additions and 0 deletions

58
scripts/compare.py Normal file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/env python3
"""Compare before/after transformation with side-by-side detection scores."""
import argparse, sys
from pathlib import Path
from detect import detect
from transform import transform
def main():
parser = argparse.ArgumentParser(description="Compare AI detection before/after transformation")
parser.add_argument("input", nargs="?", help="Input file (or stdin)")
parser.add_argument("-a", "--aggressive", action="store_true", help="Use aggressive mode")
parser.add_argument("-o", "--output", help="Save transformed text to file")
args = parser.parse_args()
text = Path(args.input).read_text() if args.input else sys.stdin.read()
before = detect(text)
transformed, changes = transform(text, aggressive=args.aggressive)
after = detect(transformed)
icons = {"very high": "🔴", "high": "🟠", "medium": "🟡", "low": "🟢"}
print(f"\n{'='*60}")
print("BEFORE → AFTER COMPARISON")
print(f"{'='*60}\n")
print(f"{'Metric':<25} {'Before':<15} {'After':<15} {'Change':<10}")
print(f"{'-'*60}")
issue_diff = after.total_issues - before.total_issues
issue_sign = "+" if issue_diff > 0 else ""
print(f"{'Issues':<25} {before.total_issues:<15} {after.total_issues:<15} {issue_sign}{issue_diff}")
print(f"{'AI Probability':<25} {icons.get(before.ai_probability,'')} {before.ai_probability:<12} {icons.get(after.ai_probability,'')} {after.ai_probability:<12}")
print(f"{'Word Count':<25} {before.word_count:<15} {after.word_count:<15} {after.word_count - before.word_count:+}")
if changes:
print(f"\n{'='*60}")
print(f"TRANSFORMATIONS ({len(changes)})")
print(f"{'='*60}")
for c in changes:
print(f"{c}")
reduction = before.total_issues - after.total_issues
if reduction > 0:
pct = (reduction / before.total_issues * 100) if before.total_issues else 0
print(f"\n✓ Reduced {reduction} issues ({pct:.0f}% improvement)")
elif reduction < 0:
print(f"\n⚠ Issues increased by {-reduction}")
else:
print(f"\n— No change in issue count")
if args.output:
Path(args.output).write_text(transformed)
print(f"\n→ Saved to {args.output}")
if __name__ == "__main__":
main()

160
scripts/detect.py Normal file
View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""Detect AI patterns in text based on Wikipedia's Signs of AI Writing."""
import argparse, json, re, sys
from pathlib import Path
from dataclasses import dataclass, field
SCRIPT_DIR = Path(__file__).parent
PATTERNS = json.loads((SCRIPT_DIR / "patterns.json").read_text())
@dataclass
class DetectionResult:
significance_inflation: list = field(default_factory=list)
notability_emphasis: list = field(default_factory=list)
superficial_analysis: list = field(default_factory=list)
promotional_language: list = field(default_factory=list)
vague_attributions: list = field(default_factory=list)
challenges_formula: list = field(default_factory=list)
ai_vocabulary: list = field(default_factory=list)
copula_avoidance: list = field(default_factory=list)
filler_phrases: list = field(default_factory=list)
chatbot_artifacts: list = field(default_factory=list)
hedging_phrases: list = field(default_factory=list)
negative_parallelisms: list = field(default_factory=list)
rule_of_three: list = field(default_factory=list)
markdown_artifacts: list = field(default_factory=list)
citation_bugs: list = field(default_factory=list)
knowledge_cutoff: list = field(default_factory=list)
curly_quotes: int = 0
em_dashes: int = 0
total_issues: int = 0
ai_probability: str = "low"
word_count: int = 0
def find_matches(text: str, patterns: list) -> list:
matches, lower = [], text.lower()
for p in patterns:
count = lower.count(p.lower())
if count > 0:
matches.append((p, count))
return sorted(matches, key=lambda x: -x[1])
def detect(text: str) -> DetectionResult:
r = DetectionResult()
r.word_count = len(text.split())
r.significance_inflation = find_matches(text, PATTERNS["significance_inflation"])
r.notability_emphasis = find_matches(text, PATTERNS["notability_emphasis"])
r.superficial_analysis = find_matches(text, PATTERNS["superficial_analysis"])
r.promotional_language = find_matches(text, PATTERNS["promotional_language"])
r.vague_attributions = find_matches(text, PATTERNS["vague_attributions"])
r.challenges_formula = find_matches(text, PATTERNS["challenges_formula"])
r.ai_vocabulary = find_matches(text, PATTERNS["ai_vocabulary"])
r.copula_avoidance = find_matches(text, list(PATTERNS["copula_avoidance"].keys()))
r.filler_phrases = find_matches(text, list(PATTERNS["filler_replacements"].keys()))
r.chatbot_artifacts = find_matches(text, PATTERNS["chatbot_artifacts"])
r.hedging_phrases = find_matches(text, PATTERNS["hedging_phrases"])
r.negative_parallelisms = find_matches(text, PATTERNS["negative_parallelisms"])
r.rule_of_three = find_matches(text, PATTERNS["rule_of_three_patterns"])
r.markdown_artifacts = find_matches(text, PATTERNS["markdown_artifacts"])
r.citation_bugs = find_matches(text, PATTERNS["citation_bugs"])
r.knowledge_cutoff = find_matches(text, PATTERNS["knowledge_cutoff"])
r.curly_quotes = len(re.findall(r'[""'']', text))
r.em_dashes = text.count("") + text.count(" -- ")
r.total_issues = (
sum(c for _, c in r.significance_inflation) + sum(c for _, c in r.notability_emphasis) +
sum(c for _, c in r.superficial_analysis) + sum(c for _, c in r.promotional_language) +
sum(c for _, c in r.vague_attributions) + sum(c for _, c in r.challenges_formula) +
sum(c for _, c in r.ai_vocabulary) + sum(c for _, c in r.copula_avoidance) +
sum(c for _, c in r.filler_phrases) + sum(c for _, c in r.chatbot_artifacts) * 3 +
sum(c for _, c in r.hedging_phrases) + sum(c for _, c in r.negative_parallelisms) +
sum(c for _, c in r.markdown_artifacts) * 2 + sum(c for _, c in r.citation_bugs) * 5 +
sum(c for _, c in r.knowledge_cutoff) * 3 + r.curly_quotes + (r.em_dashes if r.em_dashes > 3 else 0)
)
density = r.total_issues / max(r.word_count, 1) * 100
if r.citation_bugs or r.knowledge_cutoff or r.chatbot_artifacts:
r.ai_probability = "very high"
elif density > 5 or r.total_issues > 30:
r.ai_probability = "high"
elif density > 2 or r.total_issues > 15:
r.ai_probability = "medium"
return r
def print_section(title: str, items: list, replacements: dict = None):
if not items:
return
print(f"{title}:")
for phrase, count in items:
if replacements and phrase in replacements:
repl = replacements[phrase]
arrow = f'"{repl}"' if repl else " → (remove)"
print(f"\"{phrase}\"{arrow}: {count}x")
else:
print(f"{phrase}: {count}x")
print()
def print_report(r: DetectionResult):
icons = {"very high": "🔴", "high": "🟠", "medium": "🟡", "low": "🟢"}
print(f"\n{'='*60}")
print(f"AI DETECTION SCAN - {r.total_issues} issues ({r.word_count} words)")
print(f"AI Probability: {icons.get(r.ai_probability, '')} {r.ai_probability.upper()}")
print(f"{'='*60}\n")
if r.citation_bugs:
print("⚠️ CRITICAL: CHATGPT CITATION BUGS")
print_section("Citation Artifacts", r.citation_bugs)
if r.knowledge_cutoff:
print("⚠️ CRITICAL: KNOWLEDGE CUTOFF PHRASES")
print_section("Cutoff Phrases", r.knowledge_cutoff)
if r.chatbot_artifacts:
print("⚠️ HIGH: CHATBOT ARTIFACTS")
print_section("Artifacts", r.chatbot_artifacts)
if r.markdown_artifacts:
print("⚠️ MARKDOWN DETECTED")
print_section("Markdown", r.markdown_artifacts)
print_section("SIGNIFICANCE INFLATION", r.significance_inflation)
print_section("PROMOTIONAL LANGUAGE", r.promotional_language)
print_section("AI VOCABULARY", r.ai_vocabulary)
print_section("SUPERFICIAL -ING", r.superficial_analysis)
print_section("COPULA AVOIDANCE", r.copula_avoidance, PATTERNS["copula_avoidance"])
print_section("FILLER PHRASES", r.filler_phrases, PATTERNS["filler_replacements"])
print_section("VAGUE ATTRIBUTIONS", r.vague_attributions)
print_section("CHALLENGES FORMULA", r.challenges_formula)
print_section("HEDGING", r.hedging_phrases)
print_section("NEGATIVE PARALLELISMS", r.negative_parallelisms)
print_section("NOTABILITY EMPHASIS", r.notability_emphasis)
if r.curly_quotes:
print(f"CURLY QUOTES: {r.curly_quotes} (ChatGPT signature)\n")
if r.em_dashes > 3:
print(f"EM DASHES: {r.em_dashes} (excessive)\n")
if r.total_issues == 0:
print("✓ No AI patterns detected.\n")
def main():
parser = argparse.ArgumentParser(description="Detect AI patterns in text")
parser.add_argument("input", nargs="?", help="Input file (or stdin)")
parser.add_argument("--json", "-j", action="store_true", help="JSON output")
parser.add_argument("--score-only", "-s", action="store_true", help="Score and probability only")
args = parser.parse_args()
text = Path(args.input).read_text() if args.input else sys.stdin.read()
result = detect(text)
if args.json:
print(json.dumps({
"total_issues": result.total_issues, "word_count": result.word_count,
"ai_probability": result.ai_probability, "significance_inflation": result.significance_inflation,
"promotional_language": result.promotional_language, "ai_vocabulary": result.ai_vocabulary,
"chatbot_artifacts": result.chatbot_artifacts, "citation_bugs": result.citation_bugs,
"filler_phrases": result.filler_phrases, "curly_quotes": result.curly_quotes, "em_dashes": result.em_dashes,
}, indent=2))
elif args.score_only:
print(f"Issues: {result.total_issues} | Words: {result.word_count} | AI: {result.ai_probability}")
else:
print_report(result)
if __name__ == "__main__":
main()

191
scripts/patterns.json Normal file
View File

@@ -0,0 +1,191 @@
{
"significance_inflation": [
"stands as", "serves as", "is a testament", "is a reminder",
"vital role", "significant role", "crucial role", "pivotal role",
"key role", "pivotal moment", "key moment", "key turning point",
"underscores its importance", "highlights its importance",
"underscores its significance", "highlights its significance",
"reflects broader", "symbolizing its ongoing", "symbolizing its enduring",
"symbolizing its lasting", "contributing to the", "setting the stage for",
"marking the", "shaping the", "represents a shift", "marks a shift",
"evolving landscape", "focal point", "indelible mark", "deeply rooted",
"enduring legacy", "rich tapestry", "broader movement"
],
"notability_emphasis": [
"independent coverage", "local media outlets", "regional media outlets",
"national media outlets", "music outlets", "business outlets", "tech outlets",
"profiled in", "written by a leading expert", "active social media presence",
"has been featured in", "has been cited in", "maintains a strong digital presence"
],
"superficial_analysis": [
"highlighting", "underscoring", "emphasizing", "ensuring",
"reflecting", "symbolizing", "contributing to", "cultivating",
"fostering", "encompassing", "showcasing", "valuable insights",
"align with", "aligns with", "resonate with", "resonates with"
],
"promotional_language": [
"boasts a", "boasts an", "vibrant", "rich cultural heritage",
"profound", "enhancing its", "exemplifies", "commitment to",
"natural beauty", "nestled", "in the heart of", "groundbreaking",
"renowned", "breathtaking", "must-visit", "stunning", "bustling",
"game-changing", "cutting-edge", "state-of-the-art", "world-class",
"best-in-class", "industry-leading", "innovative", "revolutionary"
],
"vague_attributions": [
"industry reports", "observers have cited", "experts argue",
"experts believe", "some critics argue", "several sources",
"several publications", "according to experts", "widely regarded",
"it is widely believed", "many believe", "some would say"
],
"challenges_formula": [
"despite its", "faces several challenges", "despite these challenges",
"challenges and legacy", "future outlook", "future prospects",
"looking ahead", "moving forward", "going forward"
],
"ai_vocabulary": [
"additionally", "align with", "crucial", "delve", "emphasizing",
"enduring", "enhance", "fostering", "garner", "highlight",
"interplay", "intricate", "intricacies", "key", "landscape",
"pivotal", "showcase", "showcasing", "tapestry", "testament",
"underscore", "underscores", "valuable", "vibrant", "nuanced",
"multifaceted", "paradigm", "synergy", "realm", "underpins",
"unraveling", "unveiling", "leveraging", "furthermore", "moreover",
"consequently", "subsequently", "henceforth", "thereby", "wherein",
"thereof", "whatsoever", "nevertheless", "notwithstanding"
],
"copula_avoidance": {
"serves as a": "is a",
"serves as an": "is an",
"serves as the": "is the",
"stands as a": "is a",
"stands as an": "is an",
"stands as the": "is the",
"marks a": "is a",
"marks an": "is an",
"marks the": "is the",
"represents a": "is a",
"represents an": "is an",
"represents the": "is the",
"boasts a": "has a",
"boasts an": "has an",
"boasts the": "has the",
"features a": "has a",
"features an": "has an",
"features the": "has the",
"offers a": "has a",
"offers an": "has an"
},
"filler_replacements": {
"in order to": "to",
"due to the fact that": "because",
"at this point in time": "now",
"at the present time": "now",
"has the ability to": "can",
"it is important to note that": "",
"it should be noted that": "",
"it is worth noting that": "",
"it is crucial to note that": "",
"it is critical to remember that": "",
"it goes without saying that": "",
"needless to say": "",
"Additionally,": "",
"Furthermore,": "",
"Moreover,": "",
"In conclusion,": "",
"To summarize,": "",
"In summary,": "",
"Overall,": "",
"utilize": "use",
"utilizes": "uses",
"utilizing": "using",
"utilization": "use",
"leverage": "use",
"leverages": "uses",
"leveraging": "using",
"facilitate": "help",
"facilitates": "helps",
"facilitating": "helping",
"implement": "add",
"implements": "adds",
"prioritize": "focus on",
"prioritizes": "focuses on",
"optimize": "improve",
"optimizes": "improves",
"streamline": "simplify",
"streamlines": "simplifies"
},
"chatbot_artifacts": [
"I hope this helps",
"Let me know if",
"Would you like me to",
"Great question",
"Excellent question",
"You're absolutely right",
"That's a great point",
"That's an excellent point",
"Certainly!",
"Of course!",
"Absolutely!",
"Happy to help",
"I'd be happy to",
"Feel free to",
"Don't hesitate to",
"Here is a",
"Here's a",
"I can help you with",
"As an AI",
"As a language model",
"As an AI language model"
],
"hedging_phrases": [
"it could potentially",
"it might possibly",
"arguably",
"it could be argued that",
"some would say",
"in some ways",
"to some extent",
"in certain respects",
"may vary",
"results may vary"
],
"negative_parallelisms": [
"not only", "but also",
"it's not just about", "it's about",
"it is not merely", "it is",
"not just", "but",
"no longer", "instead"
],
"rule_of_three_patterns": [
"innovation, inspiration, and",
"engage, educate, and",
"plan, execute, and",
"design, develop, and",
"research, develop, and",
"create, collaborate, and",
"learn, grow, and"
],
"markdown_artifacts": [
"**", "##", "###", "```",
"* **", "- **", "1. **"
],
"citation_bugs": [
"oaicite", "oai_citation", "contentReference",
"turn0search", "turn0image", "utm_source=chatgpt",
"utm_source=openai", "attached_file", "grok_card"
],
"knowledge_cutoff": [
"as of my last", "as of my knowledge",
"up to my last training", "based on available information",
"while specific details are limited",
"not widely available", "not widely documented",
"in the provided sources", "in available sources"
],
"curly_quotes": {
"\u201c": "\"",
"\u201d": "\"",
"\u2018": "'",
"\u2019": "'"
},
"em_dash_patterns": ["—", " — ", "-- "]
}

127
scripts/transform.py Normal file
View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python3
"""Transform AI text to bypass detection."""
import argparse, json, re, sys
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent
PATTERNS = json.loads((SCRIPT_DIR / "patterns.json").read_text())
def replace_bounded(text: str, old: str, new: str) -> tuple[str, int]:
pattern = re.compile(re.escape(old), re.IGNORECASE) if " " in old or old.endswith(",") else re.compile(r"\b" + re.escape(old) + r"\b", re.IGNORECASE)
matches = pattern.findall(text)
return pattern.sub(new, text) if matches else text, len(matches)
def apply_replacements(text: str, replacements: dict) -> tuple[str, list]:
changes = []
for old, new in replacements.items():
text, count = replace_bounded(text, old, new)
if count:
changes.append(f'"{old}""{new}"' if new else f'"{old}" removed')
return text, changes
def fix_quotes(text: str) -> tuple[str, bool]:
original = text
for old, new in PATTERNS["curly_quotes"].items():
text = text.replace(old, new)
return text, text != original
def remove_chatbot_sentences(text: str) -> tuple[str, list]:
changes = []
for artifact in PATTERNS["chatbot_artifacts"]:
pattern = re.compile(r"[^.!?\n]*" + re.escape(artifact) + r"[^.!?\n]*[.!?]?\s*", re.IGNORECASE)
if pattern.search(text):
changes.append(f'Removed "{artifact}" sentence')
text = pattern.sub("", text)
return text, changes
def strip_markdown(text: str) -> tuple[str, list]:
changes = []
if "**" in text:
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
changes.append("Stripped bold")
if re.search(r'^#{1,6}\s', text, re.MULTILINE):
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
changes.append("Stripped headers")
if "```" in text:
text = re.sub(r'```\w*\n?', '', text)
changes.append("Stripped code blocks")
return text, changes
def reduce_em_dashes(text: str) -> tuple[str, int]:
count = text.count("") + text.count(" -- ")
text = re.sub(r"\s*—\s*", ", ", text)
text = re.sub(r"\s+--\s+", ", ", text)
return text, count
def remove_citations(text: str) -> tuple[str, list]:
changes = []
patterns = [
(r'\[oai_citation:\d+[^\]]*\]\([^)]+\)', "oai_citation"),
(r':contentReference\[oaicite:\d+\]\{[^}]+\}', "contentReference"),
(r'turn0search\d+', "turn0search"), (r'turn0image\d+', "turn0image"),
(r'\?utm_source=(chatgpt\.com|openai)', "ChatGPT UTM"),
]
for pattern, name in patterns:
if re.search(pattern, text):
text = re.sub(pattern, '', text)
changes.append(f"Removed {name}")
return text, changes
def simplify_ing(text: str) -> tuple[str, list]:
changes = []
for word in ["highlighting", "underscoring", "emphasizing", "showcasing", "fostering"]:
pattern = re.compile(rf',?\s*{word}\s+[^,.]+[,.]', re.IGNORECASE)
if pattern.search(text):
text = pattern.sub('. ', text)
changes.append(f"Simplified {word} clause")
return text, changes
def clean(text: str) -> str:
text = re.sub(r" +", " ", text)
text = re.sub(r"\n{3,}", "\n\n", text)
text = re.sub(r",\s*,", ",", text)
text = re.sub(r"(^|[.!?]\s+)([a-z])", lambda m: m.group(1) + m.group(2).upper(), text)
return text.strip()
def transform(text: str, aggressive: bool = False) -> tuple[str, list]:
all_changes = []
text, changes = remove_citations(text); all_changes.extend(changes)
text, changes = strip_markdown(text); all_changes.extend(changes)
text, changes = remove_chatbot_sentences(text); all_changes.extend(changes)
text, changes = apply_replacements(text, PATTERNS["copula_avoidance"]); all_changes.extend(changes)
text, changes = apply_replacements(text, PATTERNS["filler_replacements"]); all_changes.extend(changes)
text, fixed = fix_quotes(text)
if fixed:
all_changes.append("Fixed curly quotes")
if aggressive:
text, changes = simplify_ing(text); all_changes.extend(changes)
text, count = reduce_em_dashes(text)
if count > 2:
all_changes.append(f"Replaced {count} em dashes")
return clean(text), all_changes
def main():
parser = argparse.ArgumentParser(description="Transform AI text to human-like")
parser.add_argument("input", nargs="?", help="Input file (or stdin)")
parser.add_argument("-o", "--output", help="Output file")
parser.add_argument("-a", "--aggressive", action="store_true", help="Aggressive mode")
parser.add_argument("-q", "--quiet", action="store_true", help="Suppress change log")
args = parser.parse_args()
text = Path(args.input).read_text() if args.input else sys.stdin.read()
result, changes = transform(text, aggressive=args.aggressive)
if not args.quiet and changes:
print(f"CHANGES ({len(changes)}):", file=sys.stderr)
for c in changes:
print(f"{c}", file=sys.stderr)
if args.output:
Path(args.output).write_text(result)
if not args.quiet:
print(f"{args.output}", file=sys.stderr)
else:
print(result)
if __name__ == "__main__":
main()