Initial commit with translated description
This commit is contained in:
49
SKILL.md
Normal file
49
SKILL.md
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
name: local-whisper
|
||||
description: "使用OpenAI Whisper进行本地语音转文字。模型下载后可完全离线运行。具备多种模型尺寸的高质量转录。"
|
||||
metadata: {"clawdbot":{"emoji":"🎙️","requires":{"bins":["ffmpeg"]}}}
|
||||
---
|
||||
|
||||
# Local Whisper STT
|
||||
|
||||
Local speech-to-text using OpenAI's Whisper. **Fully offline** after initial model download.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Basic
|
||||
~/.clawdbot/skills/local-whisper/scripts/local-whisper audio.wav
|
||||
|
||||
# Better model
|
||||
~/.clawdbot/skills/local-whisper/scripts/local-whisper audio.wav --model turbo
|
||||
|
||||
# With timestamps
|
||||
~/.clawdbot/skills/local-whisper/scripts/local-whisper audio.wav --timestamps --json
|
||||
```
|
||||
|
||||
## Models
|
||||
|
||||
| Model | Size | Notes |
|
||||
|-------|------|-------|
|
||||
| `tiny` | 39M | Fastest |
|
||||
| `base` | 74M | **Default** |
|
||||
| `small` | 244M | Good balance |
|
||||
| `turbo` | 809M | Best speed/quality |
|
||||
| `large-v3` | 1.5GB | Maximum accuracy |
|
||||
|
||||
## Options
|
||||
|
||||
- `--model/-m` — Model size (default: base)
|
||||
- `--language/-l` — Language code (auto-detect if omitted)
|
||||
- `--timestamps/-t` — Include word timestamps
|
||||
- `--json/-j` — JSON output
|
||||
- `--quiet/-q` — Suppress progress
|
||||
|
||||
## Setup
|
||||
|
||||
Uses uv-managed venv at `.venv/`. To reinstall:
|
||||
```bash
|
||||
cd ~/.clawdbot/skills/local-whisper
|
||||
uv venv .venv --python 3.12
|
||||
uv pip install --python .venv/bin/python click openai-whisper torch --index-url https://download.pytorch.org/whl/cpu
|
||||
```
|
||||
6
_meta.json
Normal file
6
_meta.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"ownerId": "kn74rm3nhtpv387m12frad6bws7z5kqr",
|
||||
"slug": "local-whisper",
|
||||
"version": "1.0.0",
|
||||
"publishedAt": 1769159934671
|
||||
}
|
||||
70
scripts/transcribe.py
Normal file
70
scripts/transcribe.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Local speech-to-text using OpenAI Whisper (runs offline after model download)."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
import click
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
MODELS = ["tiny", "tiny.en", "base", "base.en", "small", "small.en",
|
||||
"medium", "medium.en", "large-v3", "turbo"]
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("audio_file", type=click.Path(exists=True))
|
||||
@click.option("-m", "--model", default="base", type=click.Choice(MODELS), help="Whisper model size")
|
||||
@click.option("-l", "--language", default=None, help="Language code (auto-detect if omitted)")
|
||||
@click.option("-t", "--timestamps", is_flag=True, help="Include word-level timestamps")
|
||||
@click.option("-j", "--json", "as_json", is_flag=True, help="Output as JSON")
|
||||
@click.option("-q", "--quiet", is_flag=True, help="Suppress progress messages")
|
||||
def main(audio_file, model, language, timestamps, as_json, quiet):
|
||||
"""Transcribe audio using OpenAI Whisper (local)."""
|
||||
try:
|
||||
import whisper
|
||||
except ImportError:
|
||||
click.echo("Error: openai-whisper not installed", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
if not quiet:
|
||||
click.echo(f"Loading model: {model}...", err=True)
|
||||
|
||||
try:
|
||||
whisper_model = whisper.load_model(model)
|
||||
except Exception as e:
|
||||
click.echo(f"Error loading model: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
if not quiet:
|
||||
click.echo(f"Transcribing: {audio_file}...", err=True)
|
||||
|
||||
try:
|
||||
result = whisper_model.transcribe(audio_file, language=language,
|
||||
word_timestamps=timestamps, verbose=False)
|
||||
except Exception as e:
|
||||
click.echo(f"Error transcribing: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
text = result["text"].strip()
|
||||
|
||||
if as_json:
|
||||
output = {"text": text, "language": result.get("language", "unknown")}
|
||||
if timestamps and "segments" in result:
|
||||
output["segments"] = [
|
||||
{"start": s["start"], "end": s["end"], "text": s["text"],
|
||||
**({"words": s["words"]} if "words" in s else {})}
|
||||
for s in result["segments"]
|
||||
]
|
||||
click.echo(json.dumps(output, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
click.echo(text)
|
||||
if timestamps and "segments" in result:
|
||||
click.echo("\n--- Segments ---", err=True)
|
||||
for seg in result["segments"]:
|
||||
click.echo(f" [{seg['start']:.2f}s - {seg['end']:.2f}s]: {seg['text']}", err=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user