Spaces:

gnosticdev
/

INVIDEO_BASIC

Running

App Files Files Community

gnosticdev commited on Aug 18

Commit

9774fb0

verified ·

1 Parent(s): 9387d2c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -47

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import gradio as gr
 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
 # Importación correcta: Solo 'concatenate_videoclips'
 from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip, concatenate_audioclips, AudioClip
 import re
@@ -175,67 +177,54 @@ def generate_script(prompt, max_length=150):
         logger.warning("Usando prompt original como guion debido al error de generación.")
         return prompt.strip()
-# Función TTS con voz especificada
 async def text_to_speech(text, output_path, voice):
     logger.info(f"Convirtiendo texto a voz | Caracteres: {len(text)} | Voz: {voice} | Salida: {output_path}")
     if not text or not text.strip():
         logger.warning("Texto vacío para TTS")
         return False
-    # Lista extendida de voces de respaldo
-    backup_voices = [
-        "es-ES-JuanNeural",
-        "es-ES-ElviraNeural",
-        "es-ES-AlvaroNeural",
-        "es-MX-DaliaNeural",
-        "es-AR-ElenaNeural"
-    ]
-    # Configuración de reintentos con espera exponencial
-    max_retries = 5
-    base_delay = 2  # segundos
-    for attempt in range(max_retries):
-        current_voice = backup_voices[attempt % len(backup_voices)]
-        if attempt > 0:
-            delay = base_delay * (2 ** (attempt - 1))  # Espera exponencial
-            logger.warning(f"Esperando {delay} segundos antes del reintento {attempt + 1}/{max_retries} con voz {current_voice}")
-            await asyncio.sleep(delay)
-        try:
-            # Crear el objeto Communicate con headers personalizados
-            communicate = edge_tts.Communicate(text, current_voice)
-            # Modificar los headers para incluir un User-Agent personalizado
-            communicate._headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, como Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
             await communicate.save(output_path)
             if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
-                logger.info(f"Audio guardado exitosamente en: {output_path} | Tamaño: {os.path.getsize(output_path)} bytes")
                 return True
             else:
-                logger.error(f"TTS guardó un archivo pequeño o vacío en: {output_path}")
-        except Exception as e:
-            logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
-            # Manejo específico para error 403
-            if "403" in str(e) or "Forbidden" in str(e):
-                logger.error("Error 403 detectado - Posible bloqueo temporal")
-                # Espera adicional para errores 403
-                if attempt < max_retries - 1:
-                    await asyncio.sleep(base_delay * 3)
-            elif "timeout" in str(e).lower():
-                logger.error("Timeout detectado - Reintentando")
-            elif "connection" in str(e).lower():
-                logger.error("Error de conexión detectado - Reintentando")
-    logger.error("Todos los intentos de TTS fallaron")
-    return False
 def download_video_file(url, temp_dir):
     if not url:

 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
+from TTS.api import TTS
 # Importación correcta: Solo 'concatenate_videoclips'
 from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip, concatenate_audioclips, AudioClip
 import re
         logger.warning("Usando prompt original como guion debido al error de generación.")
         return prompt.strip()
 async def text_to_speech(text, output_path, voice):
+    global tts_model
     logger.info(f"Convirtiendo texto a voz | Caracteres: {len(text)} | Voz: {voice} | Salida: {output_path}")
     if not text or not text.strip():
         logger.warning("Texto vacío para TTS")
         return False
+    try:
+        # Inicializar el modelo TTS si no está cargado
+        if tts_model is None:
+            logger.info("Inicializando modelo Coqui TTS...")
+            tts_model = TTS(model_name="tts_models/es/mai/vits", progress_bar=False)
+            logger.info("Modelo Coqui TTS cargado exitosamente")
+        # Generar el audio
+        logger.info("Generando audio con Coqui TTS...")
+        tts_model.tts_to_file(
+            text=text,
+            speaker=tts_model.speakers[0] if tts_model.speakers else None,
+            file_path=output_path
+        )
+        # Verificar que el archivo se creó correctamente
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
+            logger.info(f"Audio guardado exitosamente en: {output_path} | Tamaño: {os.path.getsize(output_path)} bytes")
+            return True
+        else:
+            logger.error(f"TTS guardó un archivo pequeño o vacío en: {output_path}")
+            return False
+    except Exception as e:
+        logger.error(f"Error en TTS con Coqui: {str(e)}", exc_info=True)
+        # Si falla Coqui TTS, intentar con Edge TTS como último recurso
+        logger.warning("Intentando con Edge TTS como respaldo...")
+        try:
+            communicate = edge_tts.Communicate(text, voice)
             await communicate.save(output_path)
             if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
+                logger.info(f"Audio guardado exitosamente con Edge TTS: {output_path}")
                 return True
             else:
+                logger.error(f"Edge TTS guardó un archivo pequeño o vacío: {output_path}")
+                return False
+        except Exception as e2:
+            logger.error(f"Error en TTS con Edge TTS: {str(e2)}")
+            return False
 def download_video_file(url, temp_dir):
     if not url: