jarvis/utils/audio_utils.py

"""
Audio utilities — microphone capture helpers using PyAudio.
Captures audio from the default input device and returns WAV bytes.
"""
import io
import wave
import pyaudio

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000

def record_audio(duration: float = 5.0) -> bytes:
    """
    Record audio from default microphone for the given duration.
    Returns raw WAV bytes compatible with Whisper.
    """
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT, channels=CHANNELS,
        rate=RATE, input=True, frames_per_buffer=CHUNK
    )
    frames = []
    for _ in range(int(RATE / CHUNK * duration)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        frames.append(data)
    stream.stop_stream()
    stream.close()
    p.terminate()

    buf = io.BytesIO()
    with wave.open(buf, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
    return buf.getvalue()

def list_audio_devices() -> list[dict]:
    """List all available audio input devices."""
    p = pyaudio.PyAudio()
    devices = []
    for i in range(p.get_device_count()):
        info = p.get_device_info_by_index(i)
        if info['maxInputChannels'] > 0:
            devices.append({'index': i, 'name': info['name']})
    p.terminate()
    return devices