From e8664ee527b35142cd1d42499b08c1d174c2d764 Mon Sep 17 00:00:00 2001
From: zlei9 <zlei9@126.com>
Date: Sun, 29 Mar 2026 14:29:10 +0800
Subject: [PATCH] Initial commit with translated description

---
 SKILL.md          |  82 +++++++++
 _meta.json        |   6 +
 package-lock.json |  24 +++
 package.json      |  10 ++
 scripts/vtd.js    | 435 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 557 insertions(+)
 create mode 100644 SKILL.md
 create mode 100644 _meta.json
 create mode 100644 package-lock.json
 create mode 100644 package.json
 create mode 100644 scripts/vtd.js

diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..df38102
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,82 @@
+---
+name: video-transcript-downloader
+description: "从YouTube和任何其他yt-dlp支持的站点下载视频、音频、字幕和干净的段落式转录。"
+---
+
+# Video Transcript Downloader
+
+`./scripts/vtd.js` can:
+- Print a transcript as a clean paragraph (timestamps optional).
+- Download video/audio/subtitles.
+
+Transcript behavior:
+- YouTube: fetch via `youtube-transcript-plus` when possible.
+- Otherwise: pull subtitles via `yt-dlp`, then clean into a paragraph.
+
+## Setup
+
+```bash
+cd ~/Projects/agent-scripts/skills/video-transcript-downloader && npm ci
+```
+
+## Transcript (default: clean paragraph)
+
+```bash
+./scripts/vtd.js transcript --url 'https://…'
+./scripts/vtd.js transcript --url 'https://…' --lang en
+./scripts/vtd.js transcript --url 'https://…' --timestamps
+./scripts/vtd.js transcript --url 'https://…' --keep-brackets
+```
+
+## Download video / audio / subtitles
+
+```bash
+./scripts/vtd.js download --url 'https://…' --output-dir ~/Downloads
+./scripts/vtd.js audio --url 'https://…' --output-dir ~/Downloads
+./scripts/vtd.js subs --url 'https://…' --output-dir ~/Downloads --lang en
+```
+
+## Formats (list + choose)
+
+List available formats (format ids, resolution, container, audio-only, etc):
+
+```bash
+./scripts/vtd.js formats --url 'https://…'
+```
+
+Download a specific format id (example):
+
+```bash
+./scripts/vtd.js download --url 'https://…' --output-dir ~/Downloads -- --format 137+140
+```
+
+Prefer MP4 container without re-encoding (remux when possible):
+
+```bash
+./scripts/vtd.js download --url 'https://…' --output-dir ~/Downloads -- --remux-video mp4
+```
+
+## Notes
+
+- Default transcript output is a single paragraph. Use `--timestamps` only when asked.
+- Bracketed cues like `[Music]` are stripped by default; keep them via `--keep-brackets`.
+- Pass extra `yt-dlp` args after `--` for `transcript` fallback, `download`, `audio`, `subs`, `formats`.
+
+```bash
+./scripts/vtd.js formats --url 'https://…' -- -v
+```
+
+## Troubleshooting (only when needed)
+
+- Missing `yt-dlp` / `ffmpeg`:
+
+```bash
+brew install yt-dlp ffmpeg
+```
+
+- Verify:
+
+```bash
+yt-dlp --version
+ffmpeg -version | head -n 1
+```
diff --git a/_meta.json b/_meta.json
new file mode 100644
index 0000000..204e2e7
--- /dev/null
+++ b/_meta.json
@@ -0,0 +1,6 @@
+{
+  "ownerId": "kn70pywhg0fyz996kpa8xj89s57yhv26",
+  "slug": "video-transcript-downloader",
+  "version": "1.0.0",
+  "publishedAt": 1767652029078
+}
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..b2a563f
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,24 @@
+{
+  "name": "video-transcript-downloader",
+  "version": "1.0.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "video-transcript-downloader",
+      "version": "1.0.0",
+      "dependencies": {
+        "youtube-transcript-plus": "^1.1.1"
+      }
+    },
+    "node_modules/youtube-transcript-plus": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/youtube-transcript-plus/-/youtube-transcript-plus-1.1.1.tgz",
+      "integrity": "sha512-KmAcn7eBPMgSnPJosCaaRp6/eqOAXk9p5f5ExBf+aD0M74Hud5RzbwHVoFpR5wOS++QqrrFaFhq9eiaWqWDlXg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    }
+  }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..8ab2044
--- /dev/null
+++ b/package.json
@@ -0,0 +1,10 @@
+{
+  "name": "video-transcript-downloader",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "youtube-transcript-plus": "^1.1.1"
+  }
+}
+
diff --git a/scripts/vtd.js b/scripts/vtd.js
new file mode 100644
index 0000000..4d73777
--- /dev/null
+++ b/scripts/vtd.js
@@ -0,0 +1,435 @@
+#!/usr/bin/env node
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { YoutubeTranscript } from "youtube-transcript-plus";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+function die(message, code = 1) {
+  process.stderr.write(String(message).trimEnd() + "\n");
+  process.exit(code);
+}
+
+function parseArgs(argv) {
+  // Tiny no-deps parser.
+  // - `--flag` => boolean
+  // - `--key value`
+  // - `--` => forward remaining args to yt-dlp
+  const positional = [];
+  const opts = {};
+  let i = 0;
+  while (i < argv.length) {
+    const a = argv[i];
+    if (a === "--") {
+      opts.extra = argv.slice(i + 1);
+      break;
+    }
+    if (!a.startsWith("--")) {
+      positional.push(a);
+      i += 1;
+      continue;
+    }
+    const key = a.slice(2);
+    const next = argv[i + 1];
+    const isValue = next !== undefined && !next.startsWith("--");
+    if (!isValue) {
+      opts[key] = true;
+      i += 1;
+      continue;
+    }
+    if (opts[key] === undefined) opts[key] = next;
+    else if (Array.isArray(opts[key])) opts[key].push(next);
+    else opts[key] = [opts[key], next];
+    i += 2;
+  }
+  return { positional, opts };
+}
+
+function toArray(v) {
+  if (v === undefined) return [];
+  if (Array.isArray(v)) return v;
+  return [v];
+}
+
+function which(cmd) {
+  // Avoid shelling out to `which`; keep it portable + fast.
+  const envPath = process.env.PATH || "";
+  const parts = envPath.split(path.delimiter);
+  for (const p of parts) {
+    const full = path.join(p, cmd);
+    if (fs.existsSync(full)) return full;
+  }
+  return null;
+}
+
+function resolveBin(name, fallback) {
+  return which(name) || (fallback && fs.existsSync(fallback) ? fallback : null);
+}
+
+function run(cmd, args, { cwd } = {}) {
+  return new Promise((resolve) => {
+    // Capture stdout + stderr to keep yt-dlp’s error context intact.
+    const child = spawn(cmd, args, { cwd, stdio: ["ignore", "pipe", "pipe"] });
+    let out = "";
+    child.stdout.on("data", (d) => (out += d.toString()));
+    child.stderr.on("data", (d) => (out += d.toString()));
+    child.on("close", (code) => resolve({ code, out }));
+  });
+}
+
+function isYouTubeUrl(url) {
+  return /(^https?:\/\/)?(www\.)?(youtube\.com|youtu\.be)\//i.test(url);
+}
+
+function extractYouTubeId(input) {
+  if (!input) return null;
+  const raw = String(input).trim();
+  if (/^[a-zA-Z0-9_-]{11}$/.test(raw)) return raw;
+  const m = raw.match(/(?:v=|youtu\.be\/)([a-zA-Z0-9_-]{11})/);
+  return m ? m[1] : null;
+}
+
+function decodeHtmlEntities(input) {
+  if (!input) return input;
+  // Some transcripts come back double-encoded (e.g. "&amp;#39;").
+  // Decode up to 2 passes; stop once stable.
+  let text = input;
+  for (let i = 0; i < 2; i++) {
+    const decoded = text
+      .replace(/&amp;/g, "&")
+      .replace(/&lt;/g, "<")
+      .replace(/&gt;/g, ">")
+      .replace(/&quot;/g, '"')
+      .replace(/&apos;/g, "'")
+      .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number(dec)))
+      .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)));
+    if (decoded === text) break;
+    text = decoded;
+  }
+  return text;
+}
+
+function formatTimestamp(seconds) {
+  const s = Math.max(0, Math.floor(seconds));
+  const h = Math.floor(s / 3600);
+  const m = Math.floor((s % 3600) / 60);
+  const sec = Math.floor(s % 60);
+  if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
+  return `${m}:${String(sec).padStart(2, "0")}`;
+}
+
+function cleanSegments(segments, { keepBrackets } = {}) {
+  const cleaned = [];
+  let prev = "";
+
+  for (const seg of segments) {
+    const s = String(seg || "")
+      .replace(/\s+/g, " ")
+      .trim();
+    if (!s) continue;
+
+    // Subtitles often contain HTML-ish tags; strip them.
+    const withoutTags = s.replace(/<[^>]+>/g, "").trim();
+    const withoutBrackets = keepBrackets ? withoutTags : withoutTags.replace(/\[[^\]]*\]/g, "").trim();
+    const withoutCurlies = withoutBrackets.replace(/\{[^}]+\}/g, "").replace(/♪/g, "").trim();
+    const t = withoutCurlies.replace(/\s+/g, " ").trim();
+    if (!t) continue;
+    if (t === prev) continue;
+    // Dedup heuristic: captions often repeat previous line with a longer suffix.
+    if (prev && t.startsWith(prev)) {
+      const newPart = t.slice(prev.length).trim();
+      if (newPart) cleaned.push(newPart);
+    } else if (prev && t.includes(prev)) {
+      // Another common pattern: current line contains previous line in the middle.
+      const idx = t.indexOf(prev);
+      const newPart = (t.slice(0, idx) + t.slice(idx + prev.length)).trim();
+      if (newPart) cleaned.push(newPart);
+    } else {
+      cleaned.push(t);
+    }
+    prev = t;
+  }
+
+  return cleaned;
+}
+
+function toParagraph(segments, { keepBrackets } = {}) {
+  const cleaned = cleanSegments(segments, { keepBrackets });
+  return cleaned.join(" ").replace(/\s+/g, " ").trim();
+}
+
+function parseSrt(text) {
+  const lines = String(text).split(/\r?\n/);
+  const segments = [];
+  for (const line of lines) {
+    const l = line.trim();
+    if (!l) continue;
+    if (/^\d+$/.test(l)) continue;
+    if (l.includes("-->")) continue;
+    segments.push(l);
+  }
+  return segments;
+}
+
+function parseVtt(text) {
+  const lines = String(text).split(/\r?\n/);
+  const segments = [];
+  for (const line of lines) {
+    const l = line.trim();
+    if (!l) continue;
+    if (l === "WEBVTT") continue;
+    if (l.startsWith("Kind:") || l.startsWith("Language:")) continue;
+    if (l.includes("-->")) continue;
+    // cue settings like "align:start position:0%"
+    if (/^(align|position|size|line):/i.test(l)) continue;
+    // Remove inline timestamps like "<00:00:00.000>" (common in YouTube VTT).
+    const cleaned = l.replace(/<\d{2}:\d{2}:\d{2}\.\d{3}>/g, "").trim();
+    if (cleaned) segments.push(cleaned);
+  }
+  return segments;
+}
+
+async function ytDlpSubtitlesToTemp({ url, lang, ytdlpPath, extra }) {
+  const ytdlp = ytdlpPath || resolveBin("yt-dlp", "/opt/homebrew/bin/yt-dlp");
+  if (!ytdlp) die("missing yt-dlp; install `yt-dlp` and ensure it is on PATH");
+
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "vtd-subs-"));
+  const outTemplate = path.join(tmpDir, "%(id)s.%(ext)s");
+
+  const args = [];
+  args.push(
+    "--write-sub",
+    "--write-auto-sub",
+    "--skip-download",
+    "--sub-lang",
+    lang,
+    "-o",
+    outTemplate,
+  );
+  if (extra?.length) args.push(...extra);
+  args.push(url);
+
+  const r = await run(ytdlp, args);
+  if (r.code !== 0) {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+    die(r.out.trim() || "yt-dlp subtitle download failed");
+  }
+
+  const files = fs
+    .readdirSync(tmpDir)
+    .map((f) => path.join(tmpDir, f))
+    .filter((f) => /\.(vtt|srt|ass|ttml)$/i.test(f))
+    .sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
+
+  if (files.length === 0) {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+    die(`no subtitles found (lang=${lang})`);
+  }
+
+  return { tmpDir, subtitlePath: files[0] };
+}
+
+async function cmdTranscript({ url, lang, timestamps, keepBrackets, extra }) {
+  if (!url) die("missing --url");
+
+  if (isYouTubeUrl(url)) {
+    const id = extractYouTubeId(url);
+    if (id) {
+      try {
+        // Preferred path: direct transcript fetch (no yt-dlp / no files).
+        const transcript = await YoutubeTranscript.fetchTranscript(id);
+        if (timestamps) {
+          for (const entry of transcript) {
+            const ts = formatTimestamp(entry.offset / 1000);
+            process.stdout.write(`[${ts}] ${decodeHtmlEntities(entry.text).replace(/\s+/g, " ").trim()}\n`);
+          }
+          return;
+        }
+        const paragraph = toParagraph(transcript.map((e) => decodeHtmlEntities(e.text)), { keepBrackets });
+        if (!paragraph) die("empty transcript");
+        process.stdout.write(paragraph + "\n");
+        return;
+      } catch {
+        // Fallback below: use yt-dlp subtitles when direct transcript fails.
+      }
+    }
+  }
+
+  const { tmpDir, subtitlePath } = await ytDlpSubtitlesToTemp({
+    url,
+    lang,
+    extra,
+  });
+
+  try {
+    const raw = fs.readFileSync(subtitlePath, "utf8");
+    const segments = subtitlePath.endsWith(".srt") ? parseSrt(raw) : parseVtt(raw);
+    if (timestamps) {
+      // Subtitle timestamps are inconsistent across sites; keep output stable here.
+      const paragraph = toParagraph(segments, { keepBrackets });
+      process.stdout.write(paragraph + "\n");
+      return;
+    }
+    const paragraph = toParagraph(segments, { keepBrackets });
+    if (!paragraph) die("empty transcript from subtitles");
+    process.stdout.write(paragraph + "\n");
+  } finally {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+  }
+}
+
+async function cmdSubs({ url, lang, outputDir, extra }) {
+  if (!url) die("missing --url");
+
+  const { tmpDir, subtitlePath } = await ytDlpSubtitlesToTemp({
+    url,
+    lang,
+    extra,
+  });
+
+  try {
+    const out = path.resolve(outputDir);
+    fs.mkdirSync(out, { recursive: true });
+    const dest = path.join(out, path.basename(subtitlePath));
+    fs.copyFileSync(subtitlePath, dest);
+    process.stdout.write(dest + "\n");
+  } finally {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+  }
+}
+
+async function cmdDownload({ url, outputDir, extra }) {
+  if (!url) die("missing --url");
+  const ytdlp = resolveBin("yt-dlp", "/opt/homebrew/bin/yt-dlp");
+  if (!ytdlp) die("missing yt-dlp; install `yt-dlp` and ensure it is on PATH");
+
+  const out = path.resolve(outputDir);
+  fs.mkdirSync(out, { recursive: true });
+
+  const args = [];
+
+  // `--print after_move:filepath` gives the final path after merges/remux.
+  args.push("-P", out, "-o", "%(title).200B (%(id)s).%(ext)s", "-S", "res,ext:mp4:m4a,tbr", "--print", "after_move:filepath");
+  if (extra?.length) args.push(...extra);
+  args.push(url);
+
+  const r = await run(ytdlp, args);
+  if (r.code !== 0) die(r.out.trim() || "yt-dlp download failed");
+
+  const lines = r.out.split("\n").map((l) => l.trim());
+  const filePath = lines.find((l) => l.startsWith("/") && fs.existsSync(l));
+  if (!filePath) die(r.out.trim() || "could not determine downloaded file path");
+  process.stdout.write(path.resolve(filePath) + "\n");
+}
+
+async function cmdAudio({ url, outputDir, extra }) {
+  if (!url) die("missing --url");
+  const ytdlp = resolveBin("yt-dlp", "/opt/homebrew/bin/yt-dlp");
+  if (!ytdlp) die("missing yt-dlp; install `yt-dlp` and ensure it is on PATH");
+  const ffmpeg = resolveBin("ffmpeg", "/opt/homebrew/bin/ffmpeg");
+  if (!ffmpeg) die("missing ffmpeg; install `ffmpeg` (needed for audio extraction)");
+
+  const out = path.resolve(outputDir);
+  fs.mkdirSync(out, { recursive: true });
+
+  const args = [];
+
+  args.push(
+    "--ffmpeg-location",
+    ffmpeg,
+    "-P",
+    out,
+    "-o",
+    "%(title).200B (%(id)s).%(ext)s",
+    "-x",
+    "--audio-format",
+    "mp3",
+    "--print",
+    "after_move:filepath",
+  );
+  if (extra?.length) args.push(...extra);
+  args.push(url);
+
+  const r = await run(ytdlp, args);
+  if (r.code !== 0) die(r.out.trim() || "yt-dlp audio failed");
+
+  const lines = r.out.split("\n").map((l) => l.trim());
+  const filePath = lines.find((l) => l.startsWith("/") && fs.existsSync(l));
+  if (!filePath) die(r.out.trim() || "could not determine downloaded file path");
+  process.stdout.write(path.resolve(filePath) + "\n");
+}
+
+async function cmdFormats({ url, extra }) {
+  if (!url) die("missing --url");
+  const ytdlp = resolveBin("yt-dlp", "/opt/homebrew/bin/yt-dlp");
+  if (!ytdlp) die("missing yt-dlp; install `yt-dlp` and ensure it is on PATH");
+
+  // Print raw yt-dlp format table; user picks `--format <id>` for downloads.
+  const args = ["-F"];
+  if (extra?.length) args.push(...extra);
+  args.push(url);
+
+  const r = await run(ytdlp, args);
+  if (r.code !== 0) die(r.out.trim() || "yt-dlp formats failed");
+  process.stdout.write(r.out);
+}
+
+function usage() {
+  const rel = path.relative(process.cwd(), path.join(__dirname, "vtd.js"));
+  return [
+    "usage:",
+    `  ${rel} transcript --url 'https://…' [--lang en] [--timestamps] [--keep-brackets] [-- <yt-dlp extra…>]`,
+    `  ${rel} download   --url 'https://…' [--output-dir ~/Downloads] [-- <yt-dlp extra…>]`,
+    `  ${rel} audio      --url 'https://…' [--output-dir ~/Downloads] [-- <yt-dlp extra…>]`,
+    `  ${rel} subs       --url 'https://…' [--output-dir ~/Downloads] [--lang en] [-- <yt-dlp extra…>]`,
+    `  ${rel} formats    --url 'https://…' [-- <yt-dlp extra…>]`,
+  ].join("\n");
+}
+
+async function main() {
+  const { positional, opts } = parseArgs(process.argv.slice(2));
+  const cmd = positional[0];
+
+  if (!cmd || cmd === "help" || cmd === "-h" || cmd === "--help") {
+    process.stdout.write(usage() + "\n");
+    return;
+  }
+
+  const url = opts.url;
+  const lang = opts.lang || "en";
+  const outputDir = opts["output-dir"] || path.join(os.homedir(), "Downloads");
+
+  const timestamps = Boolean(opts.timestamps);
+  const keepBrackets = Boolean(opts["keep-brackets"]);
+  const extra = opts.extra || [];
+
+  if (cmd === "transcript") {
+    await cmdTranscript({ url, lang, timestamps, keepBrackets, extra });
+    return;
+  }
+  if (cmd === "download") {
+    await cmdDownload({ url, outputDir, extra });
+    return;
+  }
+  if (cmd === "audio") {
+    await cmdAudio({ url, outputDir, extra });
+    return;
+  }
+  if (cmd === "subs") {
+    await cmdSubs({ url, lang, outputDir, extra });
+    return;
+  }
+  if (cmd === "formats") {
+    await cmdFormats({ url, extra });
+    return;
+  }
+
+  die(`unknown command: ${cmd}\n\n${usage()}`);
+}
+
+main().catch((e) => die(e?.stack || e?.message || String(e)));