Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,8 @@ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph,
|
|
| 13 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 14 |
from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT
|
| 15 |
import time
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# ============= EXTRAER TEXTO DEL PDF =============
|
| 18 |
def extraer_texto_pdf(pdf_file):
|
|
@@ -25,81 +27,122 @@ def extraer_texto_pdf(pdf_file):
|
|
| 25 |
except Exception as e:
|
| 26 |
return f"Error: {str(e)}"
|
| 27 |
|
|
|
|
| 28 |
# ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
|
| 29 |
# ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
|
| 30 |
def generar_audio_respuesta(texto, client):
|
| 31 |
"""Convierte la respuesta de texto a audio usando modelos TTS con emoción"""
|
| 32 |
|
| 33 |
-
# Modelos TTS con soporte emocional y mejor calidad
|
| 34 |
-
modelos_tts = [
|
| 35 |
-
"microsoft/speecht5_tts", # Microsoft TTS - mejor compatibilidad
|
| 36 |
-
"facebook/mms-tts-spa", # Español
|
| 37 |
-
]
|
| 38 |
-
|
| 39 |
# Limitar y optimizar texto para TTS
|
| 40 |
-
texto_limpio = texto.replace("*", "").replace("#", "").replace("`", "").replace("€", "euros").strip()
|
| 41 |
|
| 42 |
-
# Dividir en oraciones
|
| 43 |
oraciones = re.split(r'[.!?]+', texto_limpio)
|
| 44 |
-
oraciones = [o.strip() for o in oraciones if o.strip()]
|
| 45 |
|
| 46 |
-
# Tomar máximo
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
if
|
| 53 |
-
texto_audio
|
| 54 |
|
| 55 |
-
print(f"🎤 Generando audio: {texto_audio[:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
for modelo in modelos_tts:
|
| 58 |
try:
|
| 59 |
-
print(f"🔊
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
text=texto_audio,
|
| 64 |
model=modelo
|
| 65 |
)
|
| 66 |
|
| 67 |
-
# Guardar
|
| 68 |
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 69 |
-
audio_path = f"
|
| 70 |
|
| 71 |
-
#
|
| 72 |
with open(audio_path, "wb") as f:
|
| 73 |
-
if isinstance(
|
| 74 |
-
f.write(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
else:
|
| 76 |
-
#
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
if isinstance(chunk, bytes)
|
| 80 |
-
f.write(chunk)
|
| 81 |
-
except:
|
| 82 |
-
# Si falla como generador, intentar como objeto
|
| 83 |
-
if hasattr(audio_response, 'content'):
|
| 84 |
-
f.write(audio_response.content)
|
| 85 |
-
else:
|
| 86 |
-
f.write(bytes(audio_response))
|
| 87 |
|
| 88 |
-
# Verificar
|
| 89 |
-
if os.path.exists(audio_path)
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
| 95 |
os.remove(audio_path)
|
| 96 |
-
continue
|
| 97 |
|
| 98 |
except Exception as e:
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
print("⚠️ No se pudo generar audio con ningún modelo")
|
| 103 |
return None
|
| 104 |
|
| 105 |
# ============= ASISTENTE IA CONVERSACIONAL =============
|
|
|
|
| 13 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 14 |
from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT
|
| 15 |
import time
|
| 16 |
+
import numpy as np
|
| 17 |
+
import wave
|
| 18 |
|
| 19 |
# ============= EXTRAER TEXTO DEL PDF =============
|
| 20 |
def extraer_texto_pdf(pdf_file):
|
|
|
|
| 27 |
except Exception as e:
|
| 28 |
return f"Error: {str(e)}"
|
| 29 |
|
| 30 |
+
# ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
|
| 31 |
# ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
|
| 32 |
# ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
|
| 33 |
def generar_audio_respuesta(texto, client):
|
| 34 |
"""Convierte la respuesta de texto a audio usando modelos TTS con emoción"""
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Limitar y optimizar texto para TTS
|
| 37 |
+
texto_limpio = texto.replace("*", "").replace("#", "").replace("`", "").replace("€", " euros").strip()
|
| 38 |
|
| 39 |
+
# Dividir en oraciones y tomar solo las primeras 2-3
|
| 40 |
oraciones = re.split(r'[.!?]+', texto_limpio)
|
| 41 |
+
oraciones = [o.strip() for o in oraciones if o.strip() and len(o.strip()) > 10]
|
| 42 |
|
| 43 |
+
# Tomar máximo 2 oraciones para audio
|
| 44 |
+
if len(oraciones) > 2:
|
| 45 |
+
texto_audio = ". ".join(oraciones[:2]) + "."
|
| 46 |
+
else:
|
| 47 |
+
texto_audio = ". ".join(oraciones) + "."
|
| 48 |
|
| 49 |
+
# Limitar longitud
|
| 50 |
+
if len(texto_audio) > 300:
|
| 51 |
+
texto_audio = texto_audio[:297] + "..."
|
| 52 |
|
| 53 |
+
print(f"🎤 Generando audio para: '{texto_audio[:80]}...'")
|
| 54 |
+
|
| 55 |
+
# Lista de modelos a probar
|
| 56 |
+
modelos_tts = [
|
| 57 |
+
"suno/bark-small",
|
| 58 |
+
"facebook/fastspeech2-en-200_speaker-cv4",
|
| 59 |
+
"microsoft/speecht5_tts",
|
| 60 |
+
]
|
| 61 |
|
| 62 |
for modelo in modelos_tts:
|
| 63 |
try:
|
| 64 |
+
print(f"🔊 Probando modelo: {modelo}")
|
| 65 |
|
| 66 |
+
# Intentar generar audio
|
| 67 |
+
audio_data = client.text_to_speech(
|
| 68 |
text=texto_audio,
|
| 69 |
model=modelo
|
| 70 |
)
|
| 71 |
|
| 72 |
+
# Guardar archivo
|
| 73 |
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 74 |
+
audio_path = f"audio_{timestamp}.wav"
|
| 75 |
|
| 76 |
+
# Escribir datos
|
| 77 |
with open(audio_path, "wb") as f:
|
| 78 |
+
if isinstance(audio_data, bytes):
|
| 79 |
+
f.write(audio_data)
|
| 80 |
+
elif hasattr(audio_data, 'read'):
|
| 81 |
+
f.write(audio_data.read())
|
| 82 |
+
elif hasattr(audio_data, 'content'):
|
| 83 |
+
f.write(audio_data.content)
|
| 84 |
else:
|
| 85 |
+
# Intentar iterar
|
| 86 |
+
for chunk in audio_data:
|
| 87 |
+
if chunk:
|
| 88 |
+
f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
# Verificar archivo
|
| 91 |
+
if os.path.exists(audio_path):
|
| 92 |
+
size = os.path.getsize(audio_path)
|
| 93 |
+
print(f"📁 Archivo creado: {audio_path} ({size} bytes)")
|
| 94 |
+
|
| 95 |
+
if size > 1000: # Al menos 1KB
|
| 96 |
+
print(f"✅ Audio generado exitosamente")
|
| 97 |
+
return audio_path
|
| 98 |
+
else:
|
| 99 |
+
print(f"⚠️ Archivo muy pequeño ({size} bytes), eliminando...")
|
| 100 |
os.remove(audio_path)
|
|
|
|
| 101 |
|
| 102 |
except Exception as e:
|
| 103 |
+
error_msg = str(e)
|
| 104 |
+
print(f"❌ Error con {modelo}: {error_msg[:100]}")
|
| 105 |
+
|
| 106 |
+
# Si el error es sobre modelo no disponible, continuar
|
| 107 |
+
if "does not exist" in error_msg or "not found" in error_msg:
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
print("⚠️ No se pudo generar audio con ningún modelo TTS")
|
| 111 |
+
|
| 112 |
+
# PLAN B: Generar un audio de tono simple como placeholder
|
| 113 |
+
try:
|
| 114 |
+
print("🔄 Generando audio de respaldo (tono simple)...")
|
| 115 |
+
import numpy as np
|
| 116 |
+
|
| 117 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 118 |
+
audio_path = f"audio_backup_{timestamp}.wav"
|
| 119 |
+
|
| 120 |
+
# Generar un tono simple de 2 segundos
|
| 121 |
+
sample_rate = 22050
|
| 122 |
+
duration = 2.0
|
| 123 |
+
frequency = 440.0 # La nota A
|
| 124 |
+
|
| 125 |
+
t = np.linspace(0, duration, int(sample_rate * duration))
|
| 126 |
+
audio_wave = 0.3 * np.sin(2 * np.pi * frequency * t)
|
| 127 |
+
|
| 128 |
+
# Convertir a int16
|
| 129 |
+
audio_int = (audio_wave * 32767).astype(np.int16)
|
| 130 |
+
|
| 131 |
+
# Guardar como WAV manualmente
|
| 132 |
+
import wave
|
| 133 |
+
with wave.open(audio_path, 'w') as wav_file:
|
| 134 |
+
wav_file.setnchannels(1) # Mono
|
| 135 |
+
wav_file.setsampwidth(2) # 2 bytes (16 bits)
|
| 136 |
+
wav_file.setframerate(sample_rate)
|
| 137 |
+
wav_file.writeframes(audio_int.tobytes())
|
| 138 |
+
|
| 139 |
+
if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
|
| 140 |
+
print(f"✅ Audio de respaldo generado: {audio_path}")
|
| 141 |
+
return audio_path
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
print(f"❌ Error generando audio de respaldo: {str(e)}")
|
| 145 |
|
|
|
|
| 146 |
return None
|
| 147 |
|
| 148 |
# ============= ASISTENTE IA CONVERSACIONAL =============
|