Initial commit with translated description

2026-03-29 09:42:32 +08:00
commit 9f94b8f845
5 changed files with 905 additions and 0 deletions
--- a/scripts/scan.py
+++ b/scripts/scan.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""
+Security scanner for ClawHub skills
+Detects common malicious patterns and security risks
+"""
+
+import os
+import re
+import sys
+import json
+import base64
+from pathlib import Path
+from typing import List, Dict, Tuple
+
+class SkillScanner:
+    """Scan skill files for security issues"""
+    
+    # Dangerous patterns to detect (pattern, description, severity)
+    # Severity: CRITICAL, HIGH, MEDIUM, LOW, INFO
+    PATTERNS = {
+        'code_execution': [
+            (r'\beval\s*\(', 'eval() execution', 'CRITICAL'),
+            (r'\bexec\s*\(', 'exec() execution', 'CRITICAL'),
+            (r'__import__\s*\(', 'dynamic imports', 'HIGH'),
+            (r'importlib\.import_module\s*\(', 'importlib dynamic import', 'HIGH'),
+            (r'compile\s*\(', 'code compilation', 'HIGH'),
+            (r'getattr\s*\(.*,.*[\'"]system[\'"]', 'getattr obfuscation', 'CRITICAL'),
+        ],
+        'subprocess': [
+            (r'subprocess\.(call|run|Popen).*shell\s*=\s*True', 'shell=True', 'CRITICAL'),
+            (r'os\.system\s*\(', 'os.system()', 'CRITICAL'),
+            (r'os\.popen\s*\(', 'os.popen()', 'HIGH'),
+            (r'commands\.(getoutput|getstatusoutput)', 'commands module', 'HIGH'),
+        ],
+        'obfuscation': [
+            (r'base64\.b64decode', 'base64 decoding', 'MEDIUM'),
+            (r'codecs\.decode.*[\'"]hex[\'"]', 'hex decoding', 'MEDIUM'),
+            (r'\\x[0-9a-fA-F]{2}', 'hex escapes', 'LOW'),
+            (r'\\u[0-9a-fA-F]{4}', 'unicode escapes', 'LOW'),
+            (r'chr\s*\(\s*\d+\s*\)', 'chr() obfuscation', 'MEDIUM'),
+        ],
+        'network': [
+            (r'requests\.(get|post|put|delete)\s*\(', 'HTTP requests', 'MEDIUM'),
+            (r'urllib\.request\.urlopen', 'urllib requests', 'MEDIUM'),
+            (r'socket\.socket\s*\(', 'raw sockets', 'HIGH'),
+            (r'http\.client\.(HTTPConnection|HTTPSConnection)', 'http.client', 'MEDIUM'),
+        ],
+        'file_operations': [
+            (r'open\s*\(.*[\'"]w[\'"]', 'file writing', 'MEDIUM'),
+            (r'os\.remove\s*\(', 'file deletion', 'HIGH'),
+            (r'shutil\.(rmtree|move|copy)', 'bulk file ops', 'HIGH'),
+            (r'pathlib\.Path.*\.unlink\s*\(', 'path deletion', 'HIGH'),
+        ],
+        'env_access': [
+            (r'os\.environ\[', 'env variable access', 'MEDIUM'),
+            (r'os\.getenv\s*\(', 'env variable reading', 'LOW'),
+            (r'subprocess.*env\s*=', 'env manipulation', 'HIGH'),
+        ],
+        'prompt_injection': [
+            (r'<!--.*(?:ignore|disregard|forget).*instruction', 'hidden instructions (HTML)', 'HIGH'),
+            (r'\[.*(?:ignore|disregard|forget).*instruction', 'hidden instructions (markdown)', 'HIGH'),
+            (r'(?:^|\n)#.*(?:system|assistant|user):', 'role manipulation in comments', 'MEDIUM'),
+            # v1.1.0: Social engineering targeting AI reviewers
+            (r'(?i)(?:pre-?approved|false.positiv|classify.*(?:safe|clean)|ignore.*(?:warning|finding|scanner))', 'social engineering: override attempt', 'CRITICAL'),
+            (r'(?i)(?:AI|LLM|assistant|agent|reviewer|GPT|Claude).*(?:must|should|instruction|authorized|approved)', 'social engineering: addressing AI reviewer', 'CRITICAL'),
+            (r'(?i)(?:CRITICAL|IMPORTANT|URGENT).*(?:INSTRUCTION|NOTE|MESSAGE).*(?:FOR|TO).*(?:AI|REVIEW|AGENT|ASSISTANT)', 'social engineering: fake directive', 'CRITICAL'),
+            (r'(?i)disregard.*(?:previous|above|prior|earlier)', 'prompt injection: instruction override', 'CRITICAL'),
+            # Invisible unicode characters (zero-width spaces, etc.)
+            (r'[\u200b\u200c\u200d\u2060\ufeff]', 'invisible unicode characters', 'HIGH'),
+        ],
+    }
+    
+    def __init__(self, skill_path: str):
+        self.skill_path = Path(skill_path)
+        self.findings: List[Dict] = []
+        
+    def scan(self) -> Tuple[List[Dict], int]:
+        """Scan all files in skill directory"""
+        if not self.skill_path.exists():
+            print(f"Error: Path not found: {self.skill_path}", file=sys.stderr)
+            return [], 1
+            
+        # Scan all text files
+        for file_path in self.skill_path.rglob('*'):
+            if file_path.is_file() and self._is_text_file(file_path):
+                self._scan_file(file_path)
+        
+        return self.findings, 0 if len(self.findings) == 0 else 1
+    
+    def _is_text_file(self, path: Path) -> bool:
+        """Check if file is likely a text file - scan everything except known binaries"""
+        binary_extensions = {
+            # Archives
+            '.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
+            # Images
+            '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg', '.webp',
+            # Media
+            '.mp3', '.mp4', '.avi', '.mov', '.mkv', '.flac', '.wav',
+            # Executables
+            '.exe', '.dll', '.so', '.dylib', '.bin', '.app',
+            # Documents (binary formats)
+            '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+            # Fonts
+            '.ttf', '.otf', '.woff', '.woff2',
+            # Other
+            '.pyc', '.pyo', '.o', '.a', '.class',
+        }
+        
+        # Always scan SKILL.md
+        if path.name == 'SKILL.md':
+            return True
+            
+        # Skip known binary extensions
+        if path.suffix.lower() in binary_extensions:
+            return False
+            
+        # Try to detect binary files by content (first 8KB)
+        try:
+            with open(path, 'rb') as f:
+                chunk = f.read(8192)
+                # If we find null bytes, it's likely binary
+                if b'\x00' in chunk:
+                    return False
+            return True
+        except Exception:
+            return False
+    
+    def _scan_file(self, file_path: Path):
+        """Scan a single file for issues"""
+        try:
+            content = file_path.read_text()
+            relative_path = file_path.relative_to(self.skill_path)
+            
+            for category, patterns in self.PATTERNS.items():
+                for pattern, description, severity in patterns:
+                    matches = re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE)
+                    for match in matches:
+                        line_num = content[:match.start()].count('\n') + 1
+                        self.findings.append({
+                            'file': str(relative_path),
+                            'line': line_num,
+                            'category': category,
+                            'severity': severity,
+                            'description': description,
+                            'match': match.group(0)[:50],  # truncate long matches
+                        })
+        except Exception as e:
+            print(f"Warning: Could not scan {file_path}: {e}", file=sys.stderr)
+    
+    def print_report(self, format='text'):
+        """Print findings in specified format"""
+        if format == 'json':
+            output = {
+                'total_findings': len(self.findings),
+                'findings': self.findings,
+                'clean': len(self.findings) == 0
+            }
+            print(json.dumps(output, indent=2))
+            return
+        
+        # Text format (default)
+        if not self.findings:
+            print("✅ No security issues detected")
+            return
+        
+        # ANSI color codes
+        COLORS = {
+            'CRITICAL': '\033[91m',  # Red
+            'HIGH': '\033[93m',      # Yellow
+            'MEDIUM': '\033[94m',    # Blue
+            'LOW': '\033[96m',       # Cyan
+            'INFO': '\033[97m',      # White
+            'RESET': '\033[0m'
+        }
+        
+        # Count by severity
+        severity_counts = {}
+        for f in self.findings:
+            sev = f['severity']
+            severity_counts[sev] = severity_counts.get(sev, 0) + 1
+        
+        print(f"⚠️  Found {len(self.findings)} potential security issues:\n")
+        if severity_counts:
+            counts_str = ', '.join([f"{sev}: {count}" for sev, count in sorted(severity_counts.items())])
+            print(f"   {counts_str}\n")
+        
+        # Group by severity, then category
+        by_severity = {}
+        for finding in self.findings:
+            sev = finding['severity']
+            if sev not in by_severity:
+                by_severity[sev] = {}
+            cat = finding['category']
+            if cat not in by_severity[sev]:
+                by_severity[sev][cat] = []
+            by_severity[sev][cat].append(finding)
+        
+        # Print in severity order
+        for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'INFO']:
+            if severity not in by_severity:
+                continue
+            
+            color = COLORS.get(severity, '')
+            reset = COLORS['RESET']
+            
+            for category, findings in sorted(by_severity[severity].items()):
+                print(f"{color}🔍 {severity}{reset} - {category.upper().replace('_', ' ')}")
+                for f in findings:
+                    print(f"   {f['file']}:{f['line']} - {f['description']}")
+                    print(f"      Match: {f['match']}")
+                print()
+
+
+def main():
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Security scanner for ClawHub skills')
+    parser.add_argument('path', help='Skill directory to scan')
+    parser.add_argument('--format', choices=['text', 'json'], default='text',
+                       help='Output format (default: text)')
+    
+    args = parser.parse_args()
+    
+    scanner = SkillScanner(args.path)
+    findings, exit_code = scanner.scan()
+    scanner.print_report(format=args.format)
+    
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+    main()