""" Voice input — transcribes raw audio bytes using OpenAI Whisper (local). Model size set via WHISPER_MODEL env var (default: base). """ import os import tempfile import asyncio import whisper _model = None def _get_model(): global _model if _model is None: model_size = os.getenv("WHISPER_MODEL", "base") print(f"[JARVIS] Loading Whisper model: {model_size}") _model = whisper.load_model(model_size) return _model async def transcribe_audio(audio_bytes: bytes) -> str: """Transcribe raw WAV audio bytes to text using Whisper.""" model = _get_model() loop = asyncio.get_event_loop() def _transcribe(): with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: f.write(audio_bytes) tmp_path = f.name try: result = model.transcribe(tmp_path, fp16=False) return result["text"].strip() finally: os.unlink(tmp_path) return await loop.run_in_executor(None, _transcribe)