Spaces:
Running
Running
File size: 6,244 Bytes
0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 58ca1d0 0491e54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
"""
ElevenLabs Voice Integration for FocusFlow.
Provides optional voice feedback for focus agent and Pomodoro timer.
Gracefully falls back to text-only mode if API key is missing or quota exceeded.
"""
import os
import tempfile
from typing import Optional, Dict
from pathlib import Path
class VoiceGenerator:
"""
Handles text-to-speech generation using ElevenLabs API.
Designed for graceful degradation - never crashes if voice unavailable.
"""
def __init__(self):
"""Initialize ElevenLabs client if API key available."""
self.initialize()
def initialize(self):
"""Initialize or re-initialize the client."""
self.client = None
self.available = False
self.voice_id = "JBFqnCBsd6RMkjVDRZzb" # George - friendly, clear voice
self.model_id = "eleven_turbo_v2_5" # Fast, low-latency model
try:
# Check for API key (demo key first, then user key)
api_key = os.getenv("DEMO_ELEVEN_API_KEY") or os.getenv("ELEVEN_API_KEY")
if not api_key:
print("ℹ️ ElevenLabs: No API key found. Voice feedback disabled (text-only mode).")
return
# Try to initialize client
from elevenlabs.client import ElevenLabs
self.client = ElevenLabs(api_key=api_key)
self.available = True
key_type = "demo" if os.getenv("DEMO_ELEVEN_API_KEY") else "user"
print(f"✅ ElevenLabs voice initialized ({key_type} key)")
except ImportError:
print("⚠️ ElevenLabs: Package not installed. Run: pip install elevenlabs")
except Exception as e:
print(f"⚠️ ElevenLabs: Initialization failed: {e}")
def text_to_speech(self, text: str, emotion: str = "neutral") -> Optional[str]:
"""
Convert text to speech and return path to temporary audio file.
Args:
text: Text to convert to speech
emotion: Emotion hint (not used in current implementation)
Returns:
Path to temporary MP3 file, or None if voice unavailable
"""
# Check if voice is enabled globally
if os.getenv("VOICE_ENABLED", "true").lower() == "false":
return None
if not self.available or not self.client:
return None
try:
# Generate audio using ElevenLabs API
audio = self.client.text_to_speech.convert(
text=text,
voice_id=self.voice_id,
model_id=self.model_id,
output_format="mp3_44100_128"
)
# Convert generator/stream to bytes
audio_bytes = b"".join(audio)
# Save to temporary file (Gradio expects file path, not data URL)
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".mp3",
prefix="focusflow_voice_"
)
temp_file.write(audio_bytes)
temp_file.close()
return temp_file.name
except Exception as e:
# Graceful degradation - log error but don't crash
print(f"⚠️ ElevenLabs: TTS failed: {e}")
return None
def get_focus_message_audio(self, verdict: str, message: str) -> Optional[str]:
"""
Generate voice feedback for focus check results.
Args:
verdict: "On Track", "Distracted", or "Idle"
message: Text message to speak
Returns:
Path to temporary audio file or None
"""
if not self.available:
return None
# Add emotion/tone based on verdict (for future voice modulation)
emotion_map = {
"On Track": "cheerful",
"Distracted": "concerned",
"Idle": "motivating"
}
emotion = emotion_map.get(verdict, "neutral")
return self.text_to_speech(message, emotion=emotion)
def get_pomodoro_audio(self, event_type: str) -> Optional[str]:
"""
Generate voice alerts for Pomodoro timer events.
Args:
event_type: "work_complete" or "break_complete"
Returns:
Path to temporary audio file or None
"""
if not self.available:
return None
messages = {
"work_complete": "Great work! Time for a 5-minute break. You've earned it!",
"break_complete": "Break's over! Let's get back to work and stay focused!"
}
message = messages.get(event_type, "Timer complete!")
return self.text_to_speech(message, emotion="cheerful")
def test_voice(self) -> Dict[str, any]:
"""
Test voice generation (for setup/debugging).
Returns:
Dict with status, message, and optional audio data
"""
if not self.available:
return {
"status": "unavailable",
"message": "Voice not available (no API key or initialization failed)",
"audio": None
}
try:
test_message = "Hello! FocusFlow voice is working perfectly!"
audio = self.text_to_speech(test_message)
if audio:
return {
"status": "success",
"message": "Voice test successful!",
"audio": audio
}
else:
return {
"status": "error",
"message": "Voice generation failed",
"audio": None
}
except Exception as e:
return {
"status": "error",
"message": f"Voice test failed: {str(e)}",
"audio": None
}
# Global voice generator instance
voice_generator = VoiceGenerator()
def get_voice_status() -> str:
"""
Get human-readable voice status for UI display.
Returns:
Status string like "✅ ElevenLabs Voice Enabled" or "ℹ️ Voice Disabled"
"""
if voice_generator.available:
return "✅ ElevenLabs Voice Enabled"
else:
return "ℹ️ Voice Disabled (text-only mode)"
|