commit 790d68479a63cbd5b78d0d873c53e92b9c6844b1
Author: zlei9 <zlei9@126.com>
Date:   Sun Mar 29 13:04:19 2026 +0800

    Initial commit with translated description

diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..1dcf6e5
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,48 @@
+---
+name: youtube-watcher
+description: "从YouTube视频获取和阅读转录。"
+author: michael gathara
+version: 1.0.0
+triggers:
+  - "watch youtube"
+  - "summarize video"
+  - "video transcript"
+  - "youtube summary"
+  - "analyze video"
+metadata: {"clawdbot":{"emoji":"📺","requires":{"bins":["yt-dlp"]},"install":[{"id":"brew","kind":"brew","formula":"yt-dlp","bins":["yt-dlp"],"label":"Install yt-dlp (brew)"},{"id":"pip","kind":"pip","package":"yt-dlp","bins":["yt-dlp"],"label":"Install yt-dlp (pip)"}]}}
+---
+
+# YouTube Watcher
+
+Fetch transcripts from YouTube videos to enable summarization, QA, and content extraction.
+
+## Usage
+
+### Get Transcript
+
+Retrieve the text transcript of a video.
+
+```bash
+python3 {baseDir}/scripts/get_transcript.py "https://www.youtube.com/watch?v=VIDEO_ID"
+```
+
+## Examples
+
+**Summarize a video:**
+
+1. Get the transcript:
+   ```bash
+   python3 {baseDir}/scripts/get_transcript.py "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+   ```
+2. Read the output and summarize it for the user.
+
+**Find specific information:**
+
+1. Get the transcript.
+2. Search the text for keywords or answer the user's question based on the content.
+
+## Notes
+
+- Requires `yt-dlp` to be installed and available in the PATH.
+- Works with videos that have closed captions (CC) or auto-generated subtitles.
+- If a video has no subtitles, the script will fail with an error message.
diff --git a/_meta.json b/_meta.json
new file mode 100644
index 0000000..7793044
--- /dev/null
+++ b/_meta.json
@@ -0,0 +1,6 @@
+{
+  "ownerId": "kn70xwv21y5gnsycry0shkye6d7zx8jz",
+  "slug": "youtube-watcher",
+  "version": "1.0.0",
+  "publishedAt": 1769327779866
+}
\ No newline at end of file
diff --git a/scripts/get_transcript.py b/scripts/get_transcript.py
new file mode 100644
index 0000000..92860e0
--- /dev/null
+++ b/scripts/get_transcript.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+def clean_vtt(content: str) -> str:
+    """
+    Clean WebVTT content to plain text.
+    Removes headers, timestamps, and duplicate lines.
+    """
+    lines = content.splitlines()
+    text_lines = []
+    seen = set()
+    
+    timestamp_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3}\s-->\s\d{2}:\d{2}:\d{2}\.\d{3}')
+    
+    for line in lines:
+        line = line.strip()
+        if not line or line == 'WEBVTT' or line.isdigit():
+            continue
+        if timestamp_pattern.match(line):
+            continue
+        if line.startswith('NOTE') or line.startswith('STYLE'):
+            continue
+            
+        if text_lines and text_lines[-1] == line:
+            continue
+            
+        line = re.sub(r'<[^>]+>', '', line)
+        
+        text_lines.append(line)
+        
+    return '\n'.join(text_lines)
+
+def get_transcript(url: str):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            "yt-dlp",
+            "--write-subs",
+            "--write-auto-subs",
+            "--skip-download",
+            "--sub-lang", "en",
+            "--output", "subs",
+            url
+        ]
+        
+        try:
+            subprocess.run(cmd, cwd=temp_dir, check=True, capture_output=True)
+        except subprocess.CalledProcessError as e:
+            print(f"Error running yt-dlp: {e.stderr.decode()}", file=sys.stderr)
+            sys.exit(1)
+        except FileNotFoundError:
+            print("Error: yt-dlp not found. Please install it.", file=sys.stderr)
+            sys.exit(1)
+
+        temp_path = Path(temp_dir)
+        vtt_files = list(temp_path.glob("*.vtt"))
+        
+        if not vtt_files:
+            print("No subtitles found.", file=sys.stderr)
+            sys.exit(1)
+            
+        vtt_file = vtt_files[0]
+        
+        content = vtt_file.read_text(encoding='utf-8')
+        clean_text = clean_vtt(content)
+        print(clean_text)
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch YouTube transcript.")
+    parser.add_argument("url", help="YouTube video URL")
+    args = parser.parse_args()
+    
+    get_transcript(args.url)
+
+if __name__ == "__main__":
+    main()