Files
jarvis/core/voice_input.py
2026-03-24 00:11:34 -05:00

36 lines
1022 B
Python

"""
Voice input — transcribes raw audio bytes using OpenAI Whisper (local).
Model size set via WHISPER_MODEL env var (default: base).
"""
import os
import tempfile
import asyncio
import whisper
_model = None
def _get_model():
global _model
if _model is None:
model_size = os.getenv("WHISPER_MODEL", "base")
print(f"[JARVIS] Loading Whisper model: {model_size}")
_model = whisper.load_model(model_size)
return _model
async def transcribe_audio(audio_bytes: bytes) -> str:
"""Transcribe raw WAV audio bytes to text using Whisper."""
model = _get_model()
loop = asyncio.get_event_loop()
def _transcribe():
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
f.write(audio_bytes)
tmp_path = f.name
try:
result = model.transcribe(tmp_path, fp16=False)
return result["text"].strip()
finally:
os.unlink(tmp_path)
return await loop.run_in_executor(None, _transcribe)