commit 673a554c435fc535ec797d87ebd5d701a6edb29f Author: zlei9 Date: Sun Mar 29 13:06:50 2026 +0800 Initial commit with translated description diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..08d8d26 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,182 @@ +--- +name: humanize-ai-text +description: "人性化AI生成的文本以绕过检测。" +allowed-tools: + - Read + - Write + - StrReplace + - Glob +--- + +# Humanize AI Text + +Comprehensive CLI for detecting and transforming AI-generated text to bypass detectors. Based on [Wikipedia's Signs of AI Writing](https://en.wikipedia.org/wiki/Wikipedia:Signs_of_AI_writing). + +## Quick Start + +```bash +# Detect AI patterns +python scripts/detect.py text.txt + +# Transform to human-like +python scripts/transform.py text.txt -o clean.txt + +# Compare before/after +python scripts/compare.py text.txt -o clean.txt +``` + +--- + +## Detection Categories + +The analyzer checks for **16 pattern categories** from Wikipedia's guide: + +### Critical (Immediate AI Detection) +| Category | Examples | +|----------|----------| +| Citation Bugs | `oaicite`, `turn0search`, `contentReference` | +| Knowledge Cutoff | "as of my last training", "based on available information" | +| Chatbot Artifacts | "I hope this helps", "Great question!", "As an AI" | +| Markdown | `**bold**`, `## headers`, ``` code blocks ``` | + +### High Signal +| Category | Examples | +|----------|----------| +| AI Vocabulary | delve, tapestry, landscape, pivotal, underscore, foster | +| Significance Inflation | "serves as a testament", "pivotal moment", "indelible mark" | +| Promotional Language | vibrant, groundbreaking, nestled, breathtaking | +| Copula Avoidance | "serves as" instead of "is", "boasts" instead of "has" | + +### Medium Signal +| Category | Examples | +|----------|----------| +| Superficial -ing | "highlighting the importance", "fostering collaboration" | +| Filler Phrases | "in order to", "due to the fact that", "Additionally," | +| Vague Attributions | "experts believe", "industry reports suggest" | +| Challenges Formula | "Despite these challenges", "Future outlook" | + +### Style Signal +| Category | Examples | +|----------|----------| +| Curly Quotes | "" instead of "" (ChatGPT signature) | +| Em Dash Overuse | Excessive use of — for emphasis | +| Negative Parallelisms | "Not only... but also", "It's not just... it's" | +| Rule of Three | Forced triplets like "innovation, inspiration, and insight" | + +--- + +## Scripts + +### detect.py — Scan for AI Patterns + +```bash +python scripts/detect.py essay.txt +python scripts/detect.py essay.txt -j # JSON output +python scripts/detect.py essay.txt -s # score only +echo "text" | python scripts/detect.py +``` + +**Output:** +- Issue count and word count +- AI probability (low/medium/high/very high) +- Breakdown by category +- Auto-fixable patterns marked + +### transform.py — Rewrite Text + +```bash +python scripts/transform.py essay.txt +python scripts/transform.py essay.txt -o output.txt +python scripts/transform.py essay.txt -a # aggressive +python scripts/transform.py essay.txt -q # quiet +``` + +**Auto-fixes:** +- Citation bugs (oaicite, turn0search) +- Markdown (**, ##, ```) +- Chatbot sentences +- Copula avoidance → "is/has" +- Filler phrases → simpler forms +- Curly → straight quotes + +**Aggressive (-a):** +- Simplifies -ing clauses +- Reduces em dashes + +### compare.py — Before/After Analysis + +```bash +python scripts/compare.py essay.txt +python scripts/compare.py essay.txt -a -o clean.txt +``` + +Shows side-by-side detection scores before and after transformation + +--- + +## Workflow + +1. **Scan** for detection risk: + ```bash + python scripts/detect.py document.txt + ``` + +2. **Transform** with comparison: + ```bash + python scripts/compare.py document.txt -o document_v2.txt + ``` + +3. **Verify** improvement: + ```bash + python scripts/detect.py document_v2.txt -s + ``` + +4. **Manual review** for AI vocabulary and promotional language (requires judgment) + +--- + +## AI Probability Scoring + +| Rating | Criteria | +|--------|----------| +| Very High | Citation bugs, knowledge cutoff, or chatbot artifacts present | +| High | >30 issues OR >5% issue density | +| Medium | >15 issues OR >2% issue density | +| Low | <15 issues AND <2% density | + +--- + +## Customizing Patterns + +Edit `scripts/patterns.json` to add/modify: +- `ai_vocabulary` — words to flag +- `significance_inflation` — puffery phrases +- `promotional_language` — marketing speak +- `copula_avoidance` — phrase → replacement +- `filler_replacements` — phrase → simpler form +- `chatbot_artifacts` — phrases triggering sentence removal + +--- + +## Batch Processing + +```bash +# Scan all files +for f in *.txt; do + echo "=== $f ===" + python scripts/detect.py "$f" -s +done + +# Transform all markdown +for f in *.md; do + python scripts/transform.py "$f" -a -o "${f%.md}_clean.md" -q +done +``` + +--- + +## Reference + +Based on Wikipedia's [Signs of AI Writing](https://en.wikipedia.org/wiki/Wikipedia:Signs_of_AI_writing), maintained by WikiProject AI Cleanup. Patterns documented from thousands of AI-generated text examples. + +Key insight: "LLMs use statistical algorithms to guess what should come next. The result tends toward the most statistically likely result that applies to the widest variety of cases." diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..78c7ecc --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn773q4w556s7xgbbh32dw52rx80cncp", + "slug": "humanize-ai-text", + "version": "1.0.1", + "publishedAt": 1771756867954 +} \ No newline at end of file diff --git a/scripts/compare.py b/scripts/compare.py new file mode 100644 index 0000000..45632c5 --- /dev/null +++ b/scripts/compare.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Compare before/after transformation with side-by-side detection scores.""" +import argparse, sys +from pathlib import Path +from detect import detect +from transform import transform + +def main(): + parser = argparse.ArgumentParser(description="Compare AI detection before/after transformation") + parser.add_argument("input", nargs="?", help="Input file (or stdin)") + parser.add_argument("-a", "--aggressive", action="store_true", help="Use aggressive mode") + parser.add_argument("-o", "--output", help="Save transformed text to file") + args = parser.parse_args() + + text = Path(args.input).read_text() if args.input else sys.stdin.read() + + before = detect(text) + transformed, changes = transform(text, aggressive=args.aggressive) + after = detect(transformed) + + icons = {"very high": "🔴", "high": "🟠", "medium": "🟡", "low": "🟢"} + + print(f"\n{'='*60}") + print("BEFORE → AFTER COMPARISON") + print(f"{'='*60}\n") + + print(f"{'Metric':<25} {'Before':<15} {'After':<15} {'Change':<10}") + print(f"{'-'*60}") + + issue_diff = after.total_issues - before.total_issues + issue_sign = "+" if issue_diff > 0 else "" + print(f"{'Issues':<25} {before.total_issues:<15} {after.total_issues:<15} {issue_sign}{issue_diff}") + + print(f"{'AI Probability':<25} {icons.get(before.ai_probability,'')} {before.ai_probability:<12} {icons.get(after.ai_probability,'')} {after.ai_probability:<12}") + print(f"{'Word Count':<25} {before.word_count:<15} {after.word_count:<15} {after.word_count - before.word_count:+}") + + if changes: + print(f"\n{'='*60}") + print(f"TRANSFORMATIONS ({len(changes)})") + print(f"{'='*60}") + for c in changes: + print(f" • {c}") + + reduction = before.total_issues - after.total_issues + if reduction > 0: + pct = (reduction / before.total_issues * 100) if before.total_issues else 0 + print(f"\n✓ Reduced {reduction} issues ({pct:.0f}% improvement)") + elif reduction < 0: + print(f"\n⚠ Issues increased by {-reduction}") + else: + print(f"\n— No change in issue count") + + if args.output: + Path(args.output).write_text(transformed) + print(f"\n→ Saved to {args.output}") + +if __name__ == "__main__": + main() diff --git a/scripts/detect.py b/scripts/detect.py new file mode 100644 index 0000000..aefae98 --- /dev/null +++ b/scripts/detect.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +"""Detect AI patterns in text based on Wikipedia's Signs of AI Writing.""" +import argparse, json, re, sys +from pathlib import Path +from dataclasses import dataclass, field + +SCRIPT_DIR = Path(__file__).parent +PATTERNS = json.loads((SCRIPT_DIR / "patterns.json").read_text()) + +@dataclass +class DetectionResult: + significance_inflation: list = field(default_factory=list) + notability_emphasis: list = field(default_factory=list) + superficial_analysis: list = field(default_factory=list) + promotional_language: list = field(default_factory=list) + vague_attributions: list = field(default_factory=list) + challenges_formula: list = field(default_factory=list) + ai_vocabulary: list = field(default_factory=list) + copula_avoidance: list = field(default_factory=list) + filler_phrases: list = field(default_factory=list) + chatbot_artifacts: list = field(default_factory=list) + hedging_phrases: list = field(default_factory=list) + negative_parallelisms: list = field(default_factory=list) + rule_of_three: list = field(default_factory=list) + markdown_artifacts: list = field(default_factory=list) + citation_bugs: list = field(default_factory=list) + knowledge_cutoff: list = field(default_factory=list) + curly_quotes: int = 0 + em_dashes: int = 0 + total_issues: int = 0 + ai_probability: str = "low" + word_count: int = 0 + +def find_matches(text: str, patterns: list) -> list: + matches, lower = [], text.lower() + for p in patterns: + count = lower.count(p.lower()) + if count > 0: + matches.append((p, count)) + return sorted(matches, key=lambda x: -x[1]) + +def detect(text: str) -> DetectionResult: + r = DetectionResult() + r.word_count = len(text.split()) + r.significance_inflation = find_matches(text, PATTERNS["significance_inflation"]) + r.notability_emphasis = find_matches(text, PATTERNS["notability_emphasis"]) + r.superficial_analysis = find_matches(text, PATTERNS["superficial_analysis"]) + r.promotional_language = find_matches(text, PATTERNS["promotional_language"]) + r.vague_attributions = find_matches(text, PATTERNS["vague_attributions"]) + r.challenges_formula = find_matches(text, PATTERNS["challenges_formula"]) + r.ai_vocabulary = find_matches(text, PATTERNS["ai_vocabulary"]) + r.copula_avoidance = find_matches(text, list(PATTERNS["copula_avoidance"].keys())) + r.filler_phrases = find_matches(text, list(PATTERNS["filler_replacements"].keys())) + r.chatbot_artifacts = find_matches(text, PATTERNS["chatbot_artifacts"]) + r.hedging_phrases = find_matches(text, PATTERNS["hedging_phrases"]) + r.negative_parallelisms = find_matches(text, PATTERNS["negative_parallelisms"]) + r.rule_of_three = find_matches(text, PATTERNS["rule_of_three_patterns"]) + r.markdown_artifacts = find_matches(text, PATTERNS["markdown_artifacts"]) + r.citation_bugs = find_matches(text, PATTERNS["citation_bugs"]) + r.knowledge_cutoff = find_matches(text, PATTERNS["knowledge_cutoff"]) + r.curly_quotes = len(re.findall(r'[""'']', text)) + r.em_dashes = text.count("—") + text.count(" -- ") + + r.total_issues = ( + sum(c for _, c in r.significance_inflation) + sum(c for _, c in r.notability_emphasis) + + sum(c for _, c in r.superficial_analysis) + sum(c for _, c in r.promotional_language) + + sum(c for _, c in r.vague_attributions) + sum(c for _, c in r.challenges_formula) + + sum(c for _, c in r.ai_vocabulary) + sum(c for _, c in r.copula_avoidance) + + sum(c for _, c in r.filler_phrases) + sum(c for _, c in r.chatbot_artifacts) * 3 + + sum(c for _, c in r.hedging_phrases) + sum(c for _, c in r.negative_parallelisms) + + sum(c for _, c in r.markdown_artifacts) * 2 + sum(c for _, c in r.citation_bugs) * 5 + + sum(c for _, c in r.knowledge_cutoff) * 3 + r.curly_quotes + (r.em_dashes if r.em_dashes > 3 else 0) + ) + + density = r.total_issues / max(r.word_count, 1) * 100 + if r.citation_bugs or r.knowledge_cutoff or r.chatbot_artifacts: + r.ai_probability = "very high" + elif density > 5 or r.total_issues > 30: + r.ai_probability = "high" + elif density > 2 or r.total_issues > 15: + r.ai_probability = "medium" + return r + +def print_section(title: str, items: list, replacements: dict = None): + if not items: + return + print(f"{title}:") + for phrase, count in items: + if replacements and phrase in replacements: + repl = replacements[phrase] + arrow = f' → "{repl}"' if repl else " → (remove)" + print(f" • \"{phrase}\"{arrow}: {count}x") + else: + print(f" • {phrase}: {count}x") + print() + +def print_report(r: DetectionResult): + icons = {"very high": "🔴", "high": "🟠", "medium": "🟡", "low": "🟢"} + print(f"\n{'='*60}") + print(f"AI DETECTION SCAN - {r.total_issues} issues ({r.word_count} words)") + print(f"AI Probability: {icons.get(r.ai_probability, '')} {r.ai_probability.upper()}") + print(f"{'='*60}\n") + + if r.citation_bugs: + print("⚠️ CRITICAL: CHATGPT CITATION BUGS") + print_section("Citation Artifacts", r.citation_bugs) + if r.knowledge_cutoff: + print("⚠️ CRITICAL: KNOWLEDGE CUTOFF PHRASES") + print_section("Cutoff Phrases", r.knowledge_cutoff) + if r.chatbot_artifacts: + print("⚠️ HIGH: CHATBOT ARTIFACTS") + print_section("Artifacts", r.chatbot_artifacts) + if r.markdown_artifacts: + print("⚠️ MARKDOWN DETECTED") + print_section("Markdown", r.markdown_artifacts) + + print_section("SIGNIFICANCE INFLATION", r.significance_inflation) + print_section("PROMOTIONAL LANGUAGE", r.promotional_language) + print_section("AI VOCABULARY", r.ai_vocabulary) + print_section("SUPERFICIAL -ING", r.superficial_analysis) + print_section("COPULA AVOIDANCE", r.copula_avoidance, PATTERNS["copula_avoidance"]) + print_section("FILLER PHRASES", r.filler_phrases, PATTERNS["filler_replacements"]) + print_section("VAGUE ATTRIBUTIONS", r.vague_attributions) + print_section("CHALLENGES FORMULA", r.challenges_formula) + print_section("HEDGING", r.hedging_phrases) + print_section("NEGATIVE PARALLELISMS", r.negative_parallelisms) + print_section("NOTABILITY EMPHASIS", r.notability_emphasis) + + if r.curly_quotes: + print(f"CURLY QUOTES: {r.curly_quotes} (ChatGPT signature)\n") + if r.em_dashes > 3: + print(f"EM DASHES: {r.em_dashes} (excessive)\n") + if r.total_issues == 0: + print("✓ No AI patterns detected.\n") + +def main(): + parser = argparse.ArgumentParser(description="Detect AI patterns in text") + parser.add_argument("input", nargs="?", help="Input file (or stdin)") + parser.add_argument("--json", "-j", action="store_true", help="JSON output") + parser.add_argument("--score-only", "-s", action="store_true", help="Score and probability only") + args = parser.parse_args() + + text = Path(args.input).read_text() if args.input else sys.stdin.read() + result = detect(text) + + if args.json: + print(json.dumps({ + "total_issues": result.total_issues, "word_count": result.word_count, + "ai_probability": result.ai_probability, "significance_inflation": result.significance_inflation, + "promotional_language": result.promotional_language, "ai_vocabulary": result.ai_vocabulary, + "chatbot_artifacts": result.chatbot_artifacts, "citation_bugs": result.citation_bugs, + "filler_phrases": result.filler_phrases, "curly_quotes": result.curly_quotes, "em_dashes": result.em_dashes, + }, indent=2)) + elif args.score_only: + print(f"Issues: {result.total_issues} | Words: {result.word_count} | AI: {result.ai_probability}") + else: + print_report(result) + +if __name__ == "__main__": + main() diff --git a/scripts/patterns.json b/scripts/patterns.json new file mode 100644 index 0000000..70edb82 --- /dev/null +++ b/scripts/patterns.json @@ -0,0 +1,191 @@ +{ + "significance_inflation": [ + "stands as", "serves as", "is a testament", "is a reminder", + "vital role", "significant role", "crucial role", "pivotal role", + "key role", "pivotal moment", "key moment", "key turning point", + "underscores its importance", "highlights its importance", + "underscores its significance", "highlights its significance", + "reflects broader", "symbolizing its ongoing", "symbolizing its enduring", + "symbolizing its lasting", "contributing to the", "setting the stage for", + "marking the", "shaping the", "represents a shift", "marks a shift", + "evolving landscape", "focal point", "indelible mark", "deeply rooted", + "enduring legacy", "rich tapestry", "broader movement" + ], + "notability_emphasis": [ + "independent coverage", "local media outlets", "regional media outlets", + "national media outlets", "music outlets", "business outlets", "tech outlets", + "profiled in", "written by a leading expert", "active social media presence", + "has been featured in", "has been cited in", "maintains a strong digital presence" + ], + "superficial_analysis": [ + "highlighting", "underscoring", "emphasizing", "ensuring", + "reflecting", "symbolizing", "contributing to", "cultivating", + "fostering", "encompassing", "showcasing", "valuable insights", + "align with", "aligns with", "resonate with", "resonates with" + ], + "promotional_language": [ + "boasts a", "boasts an", "vibrant", "rich cultural heritage", + "profound", "enhancing its", "exemplifies", "commitment to", + "natural beauty", "nestled", "in the heart of", "groundbreaking", + "renowned", "breathtaking", "must-visit", "stunning", "bustling", + "game-changing", "cutting-edge", "state-of-the-art", "world-class", + "best-in-class", "industry-leading", "innovative", "revolutionary" + ], + "vague_attributions": [ + "industry reports", "observers have cited", "experts argue", + "experts believe", "some critics argue", "several sources", + "several publications", "according to experts", "widely regarded", + "it is widely believed", "many believe", "some would say" + ], + "challenges_formula": [ + "despite its", "faces several challenges", "despite these challenges", + "challenges and legacy", "future outlook", "future prospects", + "looking ahead", "moving forward", "going forward" + ], + "ai_vocabulary": [ + "additionally", "align with", "crucial", "delve", "emphasizing", + "enduring", "enhance", "fostering", "garner", "highlight", + "interplay", "intricate", "intricacies", "key", "landscape", + "pivotal", "showcase", "showcasing", "tapestry", "testament", + "underscore", "underscores", "valuable", "vibrant", "nuanced", + "multifaceted", "paradigm", "synergy", "realm", "underpins", + "unraveling", "unveiling", "leveraging", "furthermore", "moreover", + "consequently", "subsequently", "henceforth", "thereby", "wherein", + "thereof", "whatsoever", "nevertheless", "notwithstanding" + ], + "copula_avoidance": { + "serves as a": "is a", + "serves as an": "is an", + "serves as the": "is the", + "stands as a": "is a", + "stands as an": "is an", + "stands as the": "is the", + "marks a": "is a", + "marks an": "is an", + "marks the": "is the", + "represents a": "is a", + "represents an": "is an", + "represents the": "is the", + "boasts a": "has a", + "boasts an": "has an", + "boasts the": "has the", + "features a": "has a", + "features an": "has an", + "features the": "has the", + "offers a": "has a", + "offers an": "has an" + }, + "filler_replacements": { + "in order to": "to", + "due to the fact that": "because", + "at this point in time": "now", + "at the present time": "now", + "has the ability to": "can", + "it is important to note that": "", + "it should be noted that": "", + "it is worth noting that": "", + "it is crucial to note that": "", + "it is critical to remember that": "", + "it goes without saying that": "", + "needless to say": "", + "Additionally,": "", + "Furthermore,": "", + "Moreover,": "", + "In conclusion,": "", + "To summarize,": "", + "In summary,": "", + "Overall,": "", + "utilize": "use", + "utilizes": "uses", + "utilizing": "using", + "utilization": "use", + "leverage": "use", + "leverages": "uses", + "leveraging": "using", + "facilitate": "help", + "facilitates": "helps", + "facilitating": "helping", + "implement": "add", + "implements": "adds", + "prioritize": "focus on", + "prioritizes": "focuses on", + "optimize": "improve", + "optimizes": "improves", + "streamline": "simplify", + "streamlines": "simplifies" + }, + "chatbot_artifacts": [ + "I hope this helps", + "Let me know if", + "Would you like me to", + "Great question", + "Excellent question", + "You're absolutely right", + "That's a great point", + "That's an excellent point", + "Certainly!", + "Of course!", + "Absolutely!", + "Happy to help", + "I'd be happy to", + "Feel free to", + "Don't hesitate to", + "Here is a", + "Here's a", + "I can help you with", + "As an AI", + "As a language model", + "As an AI language model" + ], + "hedging_phrases": [ + "it could potentially", + "it might possibly", + "arguably", + "it could be argued that", + "some would say", + "in some ways", + "to some extent", + "in certain respects", + "may vary", + "results may vary" + ], + "negative_parallelisms": [ + "not only", "but also", + "it's not just about", "it's about", + "it is not merely", "it is", + "not just", "but", + "no longer", "instead" + ], + "rule_of_three_patterns": [ + "innovation, inspiration, and", + "engage, educate, and", + "plan, execute, and", + "design, develop, and", + "research, develop, and", + "create, collaborate, and", + "learn, grow, and" + ], + "markdown_artifacts": [ + "**", "##", "###", "```", + "* **", "- **", "1. **" + ], + "citation_bugs": [ + "oaicite", "oai_citation", "contentReference", + "turn0search", "turn0image", "utm_source=chatgpt", + "utm_source=openai", "attached_file", "grok_card" + ], + "knowledge_cutoff": [ + "as of my last", "as of my knowledge", + "up to my last training", "based on available information", + "while specific details are limited", + "not widely available", "not widely documented", + "in the provided sources", "in available sources" + ], + "curly_quotes": { + "\u201c": "\"", + "\u201d": "\"", + "\u2018": "'", + "\u2019": "'" + }, + "em_dash_patterns": ["—", " — ", "-- "] +} diff --git a/scripts/transform.py b/scripts/transform.py new file mode 100644 index 0000000..dd414f6 --- /dev/null +++ b/scripts/transform.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +"""Transform AI text to bypass detection.""" +import argparse, json, re, sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).parent +PATTERNS = json.loads((SCRIPT_DIR / "patterns.json").read_text()) + +def replace_bounded(text: str, old: str, new: str) -> tuple[str, int]: + pattern = re.compile(re.escape(old), re.IGNORECASE) if " " in old or old.endswith(",") else re.compile(r"\b" + re.escape(old) + r"\b", re.IGNORECASE) + matches = pattern.findall(text) + return pattern.sub(new, text) if matches else text, len(matches) + +def apply_replacements(text: str, replacements: dict) -> tuple[str, list]: + changes = [] + for old, new in replacements.items(): + text, count = replace_bounded(text, old, new) + if count: + changes.append(f'"{old}" → "{new}"' if new else f'"{old}" removed') + return text, changes + +def fix_quotes(text: str) -> tuple[str, bool]: + original = text + for old, new in PATTERNS["curly_quotes"].items(): + text = text.replace(old, new) + return text, text != original + +def remove_chatbot_sentences(text: str) -> tuple[str, list]: + changes = [] + for artifact in PATTERNS["chatbot_artifacts"]: + pattern = re.compile(r"[^.!?\n]*" + re.escape(artifact) + r"[^.!?\n]*[.!?]?\s*", re.IGNORECASE) + if pattern.search(text): + changes.append(f'Removed "{artifact}" sentence') + text = pattern.sub("", text) + return text, changes + +def strip_markdown(text: str) -> tuple[str, list]: + changes = [] + if "**" in text: + text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) + changes.append("Stripped bold") + if re.search(r'^#{1,6}\s', text, re.MULTILINE): + text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE) + changes.append("Stripped headers") + if "```" in text: + text = re.sub(r'```\w*\n?', '', text) + changes.append("Stripped code blocks") + return text, changes + +def reduce_em_dashes(text: str) -> tuple[str, int]: + count = text.count("—") + text.count(" -- ") + text = re.sub(r"\s*—\s*", ", ", text) + text = re.sub(r"\s+--\s+", ", ", text) + return text, count + +def remove_citations(text: str) -> tuple[str, list]: + changes = [] + patterns = [ + (r'\[oai_citation:\d+[^\]]*\]\([^)]+\)', "oai_citation"), + (r':contentReference\[oaicite:\d+\]\{[^}]+\}', "contentReference"), + (r'turn0search\d+', "turn0search"), (r'turn0image\d+', "turn0image"), + (r'\?utm_source=(chatgpt\.com|openai)', "ChatGPT UTM"), + ] + for pattern, name in patterns: + if re.search(pattern, text): + text = re.sub(pattern, '', text) + changes.append(f"Removed {name}") + return text, changes + +def simplify_ing(text: str) -> tuple[str, list]: + changes = [] + for word in ["highlighting", "underscoring", "emphasizing", "showcasing", "fostering"]: + pattern = re.compile(rf',?\s*{word}\s+[^,.]+[,.]', re.IGNORECASE) + if pattern.search(text): + text = pattern.sub('. ', text) + changes.append(f"Simplified {word} clause") + return text, changes + +def clean(text: str) -> str: + text = re.sub(r" +", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + text = re.sub(r",\s*,", ",", text) + text = re.sub(r"(^|[.!?]\s+)([a-z])", lambda m: m.group(1) + m.group(2).upper(), text) + return text.strip() + +def transform(text: str, aggressive: bool = False) -> tuple[str, list]: + all_changes = [] + text, changes = remove_citations(text); all_changes.extend(changes) + text, changes = strip_markdown(text); all_changes.extend(changes) + text, changes = remove_chatbot_sentences(text); all_changes.extend(changes) + text, changes = apply_replacements(text, PATTERNS["copula_avoidance"]); all_changes.extend(changes) + text, changes = apply_replacements(text, PATTERNS["filler_replacements"]); all_changes.extend(changes) + text, fixed = fix_quotes(text) + if fixed: + all_changes.append("Fixed curly quotes") + if aggressive: + text, changes = simplify_ing(text); all_changes.extend(changes) + text, count = reduce_em_dashes(text) + if count > 2: + all_changes.append(f"Replaced {count} em dashes") + return clean(text), all_changes + +def main(): + parser = argparse.ArgumentParser(description="Transform AI text to human-like") + parser.add_argument("input", nargs="?", help="Input file (or stdin)") + parser.add_argument("-o", "--output", help="Output file") + parser.add_argument("-a", "--aggressive", action="store_true", help="Aggressive mode") + parser.add_argument("-q", "--quiet", action="store_true", help="Suppress change log") + args = parser.parse_args() + + text = Path(args.input).read_text() if args.input else sys.stdin.read() + result, changes = transform(text, aggressive=args.aggressive) + + if not args.quiet and changes: + print(f"CHANGES ({len(changes)}):", file=sys.stderr) + for c in changes: + print(f" • {c}", file=sys.stderr) + + if args.output: + Path(args.output).write_text(result) + if not args.quiet: + print(f"→ {args.output}", file=sys.stderr) + else: + print(result) + +if __name__ == "__main__": + main()