Files
jarvis/core/voice_output.py
2026-03-24 00:11:34 -05:00

28 lines
795 B
Python

"""
Voice output — converts text to speech using Fish Audio TTS API.
"""
import os
import asyncio
from fish_audio_sdk import Session, TTSRequest
async def speak(text: str) -> bytes:
"""
Convert text to speech audio bytes using Fish Audio TTS.
Returns raw audio bytes (MP3).
"""
api_key = os.getenv("FISH_AUDIO_API_KEY")
voice_id = os.getenv("FISH_AUDIO_VOICE_ID", None)
loop = asyncio.get_event_loop()
def _tts():
with Session(apikey=api_key) as session:
audio_chunks = []
for chunk in session.tts(TTSRequest(
text=text,
reference_id=voice_id
)):
audio_chunks.append(chunk)
return b"".join(audio_chunks)
return await loop.run_in_executor(None, _tts)