71 lines
2.5 KiB
Python
71 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Local speech-to-text using OpenAI Whisper (runs offline after model download)."""
|
|
|
|
import json
|
|
import sys
|
|
import warnings
|
|
|
|
import click
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
MODELS = ["tiny", "tiny.en", "base", "base.en", "small", "small.en",
|
|
"medium", "medium.en", "large-v3", "turbo"]
|
|
|
|
|
|
@click.command()
|
|
@click.argument("audio_file", type=click.Path(exists=True))
|
|
@click.option("-m", "--model", default="base", type=click.Choice(MODELS), help="Whisper model size")
|
|
@click.option("-l", "--language", default=None, help="Language code (auto-detect if omitted)")
|
|
@click.option("-t", "--timestamps", is_flag=True, help="Include word-level timestamps")
|
|
@click.option("-j", "--json", "as_json", is_flag=True, help="Output as JSON")
|
|
@click.option("-q", "--quiet", is_flag=True, help="Suppress progress messages")
|
|
def main(audio_file, model, language, timestamps, as_json, quiet):
|
|
"""Transcribe audio using OpenAI Whisper (local)."""
|
|
try:
|
|
import whisper
|
|
except ImportError:
|
|
click.echo("Error: openai-whisper not installed", err=True)
|
|
sys.exit(1)
|
|
|
|
if not quiet:
|
|
click.echo(f"Loading model: {model}...", err=True)
|
|
|
|
try:
|
|
whisper_model = whisper.load_model(model)
|
|
except Exception as e:
|
|
click.echo(f"Error loading model: {e}", err=True)
|
|
sys.exit(1)
|
|
|
|
if not quiet:
|
|
click.echo(f"Transcribing: {audio_file}...", err=True)
|
|
|
|
try:
|
|
result = whisper_model.transcribe(audio_file, language=language,
|
|
word_timestamps=timestamps, verbose=False)
|
|
except Exception as e:
|
|
click.echo(f"Error transcribing: {e}", err=True)
|
|
sys.exit(1)
|
|
|
|
text = result["text"].strip()
|
|
|
|
if as_json:
|
|
output = {"text": text, "language": result.get("language", "unknown")}
|
|
if timestamps and "segments" in result:
|
|
output["segments"] = [
|
|
{"start": s["start"], "end": s["end"], "text": s["text"],
|
|
**({"words": s["words"]} if "words" in s else {})}
|
|
for s in result["segments"]
|
|
]
|
|
click.echo(json.dumps(output, indent=2, ensure_ascii=False))
|
|
else:
|
|
click.echo(text)
|
|
if timestamps and "segments" in result:
|
|
click.echo("\n--- Segments ---", err=True)
|
|
for seg in result["segments"]:
|
|
click.echo(f" [{seg['start']:.2f}s - {seg['end']:.2f}s]: {seg['text']}", err=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|