commit 790d68479a63cbd5b78d0d873c53e92b9c6844b1 Author: zlei9 Date: Sun Mar 29 13:04:19 2026 +0800 Initial commit with translated description diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..1dcf6e5 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,48 @@ +--- +name: youtube-watcher +description: "从YouTube视频获取和阅读转录。" +author: michael gathara +version: 1.0.0 +triggers: + - "watch youtube" + - "summarize video" + - "video transcript" + - "youtube summary" + - "analyze video" +metadata: {"clawdbot":{"emoji":"📺","requires":{"bins":["yt-dlp"]},"install":[{"id":"brew","kind":"brew","formula":"yt-dlp","bins":["yt-dlp"],"label":"Install yt-dlp (brew)"},{"id":"pip","kind":"pip","package":"yt-dlp","bins":["yt-dlp"],"label":"Install yt-dlp (pip)"}]}} +--- + +# YouTube Watcher + +Fetch transcripts from YouTube videos to enable summarization, QA, and content extraction. + +## Usage + +### Get Transcript + +Retrieve the text transcript of a video. + +```bash +python3 {baseDir}/scripts/get_transcript.py "https://www.youtube.com/watch?v=VIDEO_ID" +``` + +## Examples + +**Summarize a video:** + +1. Get the transcript: + ```bash + python3 {baseDir}/scripts/get_transcript.py "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + ``` +2. Read the output and summarize it for the user. + +**Find specific information:** + +1. Get the transcript. +2. Search the text for keywords or answer the user's question based on the content. + +## Notes + +- Requires `yt-dlp` to be installed and available in the PATH. +- Works with videos that have closed captions (CC) or auto-generated subtitles. +- If a video has no subtitles, the script will fail with an error message. diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..7793044 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn70xwv21y5gnsycry0shkye6d7zx8jz", + "slug": "youtube-watcher", + "version": "1.0.0", + "publishedAt": 1769327779866 +} \ No newline at end of file diff --git a/scripts/get_transcript.py b/scripts/get_transcript.py new file mode 100644 index 0000000..92860e0 --- /dev/null +++ b/scripts/get_transcript.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +import argparse +import os +import re +import subprocess +import sys +import tempfile +from pathlib import Path + +def clean_vtt(content: str) -> str: + """ + Clean WebVTT content to plain text. + Removes headers, timestamps, and duplicate lines. + """ + lines = content.splitlines() + text_lines = [] + seen = set() + + timestamp_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3}\s-->\s\d{2}:\d{2}:\d{2}\.\d{3}') + + for line in lines: + line = line.strip() + if not line or line == 'WEBVTT' or line.isdigit(): + continue + if timestamp_pattern.match(line): + continue + if line.startswith('NOTE') or line.startswith('STYLE'): + continue + + if text_lines and text_lines[-1] == line: + continue + + line = re.sub(r'<[^>]+>', '', line) + + text_lines.append(line) + + return '\n'.join(text_lines) + +def get_transcript(url: str): + with tempfile.TemporaryDirectory() as temp_dir: + cmd = [ + "yt-dlp", + "--write-subs", + "--write-auto-subs", + "--skip-download", + "--sub-lang", "en", + "--output", "subs", + url + ] + + try: + subprocess.run(cmd, cwd=temp_dir, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + print(f"Error running yt-dlp: {e.stderr.decode()}", file=sys.stderr) + sys.exit(1) + except FileNotFoundError: + print("Error: yt-dlp not found. Please install it.", file=sys.stderr) + sys.exit(1) + + temp_path = Path(temp_dir) + vtt_files = list(temp_path.glob("*.vtt")) + + if not vtt_files: + print("No subtitles found.", file=sys.stderr) + sys.exit(1) + + vtt_file = vtt_files[0] + + content = vtt_file.read_text(encoding='utf-8') + clean_text = clean_vtt(content) + print(clean_text) + +def main(): + parser = argparse.ArgumentParser(description="Fetch YouTube transcript.") + parser.add_argument("url", help="YouTube video URL") + args = parser.parse_args() + + get_transcript(args.url) + +if __name__ == "__main__": + main()