Spaces:

gnosticdev
/

INVIDEO_BASIC

Running

App Files Files Community

gnosticdev commited on Jul 13

Commit

744ab6c

verified ·

1 Parent(s): 38d44ea

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -69

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import gradio as gr
 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
-# Importación correcta: Solo 'concatenate_videoclips'
 from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip, concatenate_audioclips, AudioClip
 import re
 import math
@@ -32,6 +32,7 @@ logger.info("INICIO DE EJECUCIÓN - GENERADOR DE VIDEOS")
 logger.info("="*80)
 # Diccionario de voces TTS disponibles organizadas por idioma
 VOCES_DISPONIBLES = {
     "Español (España)": {
         "es-ES-JuanNeural": "Juan (España) - Masculino",
@@ -99,9 +100,32 @@ def get_voice_choices():
     choices = []
     for region, voices in VOCES_DISPONIBLES.items():
         for voice_id, voice_name in voices.items():
-            choices.append((voice_name, voice_id))
     return choices
 # Clave API de Pexels
 PEXELS_API_KEY = os.environ.get("PEXELS_API_KEY")
 if not PEXELS_API_KEY:
@@ -200,53 +224,63 @@ def generate_script(prompt, max_length=150):
         text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         cleaned_text = text.strip()
         try:
-            instruction_end_idx = text.find(instruction_phrase)
-            if instruction_end_idx != -1:
-                cleaned_text = text[instruction_end_idx + len(instruction_phrase):].strip()
-                logger.debug("Instrucción inicial encontrada y eliminada del guión generado.")
             else:
                  instruction_start_idx = text.find(instruction_phrase_start)
                  if instruction_start_idx != -1:
-                     prompt_in_output_idx = text.find(prompt, instruction_start_idx)
-                     if prompt_in_output_idx != -1:
-                          cleaned_text = text[prompt_in_output_idx + len(prompt):].strip()
-                          logger.debug("Instrucción base y prompt encontrados y eliminados del guión generado.")
-                     else:
-                          cleaned_text = text[instruction_start_idx + len(instruction_phrase_start):].strip()
-                          logger.debug("Instrucción base encontrada, eliminada del guión generado (sin prompt detectado).")
         except Exception as e:
              logger.warning(f"Error durante la limpieza heurística del guión de IA: {e}. Usando texto generado sin limpieza adicional.")
-             cleaned_text = re.sub(r'<[^>]+>', '', text).strip()
-        if not cleaned_text or len(cleaned_text) < 10:
-             logger.warning("El guión generado parece muy corto o vacío después de la limpieza. Usando el texto generado original (sin limpieza heurística).")
-             cleaned_text = re.sub(r'<[^>]+>', '', text).strip()
         cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text).strip()
-        cleaned_text = cleaned_text.lstrip(':').strip()
-        cleaned_text = cleaned_text.lstrip('.').strip()
         sentences = cleaned_text.split('.')
         if sentences and sentences[0].strip():
             final_text = sentences[0].strip() + '.'
-            if len(sentences) > 1 and sentences[1].strip() and len(final_text.split()) < max_length * 0.7:
                  final_text += " " + sentences[1].strip() + "."
-                 final_text = final_text.replace("..", ".")
             logger.info(f"Guion generado final (Truncado a 100 chars): '{final_text[:100]}...'")
             return final_text.strip()
         logger.info(f"Guion generado final (sin oraciones completas detectadas - Truncado): '{cleaned_text[:100]}...'")
-        return cleaned_text.strip()
     except Exception as e:
         logger.error(f"Error generando guion con GPT-2 (fuera del bloque de limpieza): {str(e)}", exc_info=True)
         logger.warning("Usando prompt original como guion debido al error de generación.")
         return prompt.strip()
-# Función TTS con voz especificada
 async def text_to_speech(text, output_path, voice):
     logger.info(f"Convirtiendo texto a voz | Caracteres: {len(text)} | Voz: {voice} | Salida: {output_path}")
     if not text or not text.strip():
@@ -417,6 +451,7 @@ def extract_visual_keywords_from_script(script_text):
     logger.info(f"Palabras clave finales: {top_keywords}")
     return top_keywords
 def crear_video(prompt_type, input_text, selected_voice, musica_file=None):
     logger.info("="*80)
     logger.info(f"INICIANDO CREACIÓN DE VIDEO | Tipo: {prompt_type}")
@@ -452,35 +487,40 @@ def crear_video(prompt_type, input_text, selected_voice, musica_file=None):
         logger.info(f"Directorio temporal intermedio creado: {temp_dir_intermediate}")
         temp_intermediate_files = []
-        # 2. Generar audio de voz con reintentos y voz de respaldo
         logger.info("Generando audio de voz...")
         voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
-        primary_voice = selected_voice
-        fallback_voice = "es-ES-ElviraNeural" if selected_voice != "es-ES-ElviraNeural" else "es-ES-JuanNeural"
         tts_success = False
-        retries = 3
-        for attempt in range(retries):
-            current_voice = primary_voice if attempt == 0 else fallback_voice
-            if attempt > 0: logger.warning(f"Reintentando TTS ({attempt+1}/{retries})...")
-            logger.info(f"Intentando TTS con voz: {current_voice}")
             try:
                 tts_success = asyncio.run(text_to_speech(guion, voz_path, voice=current_voice))
                 if tts_success:
-                    logger.info(f"TTS exitoso en intento {attempt + 1} con voz {current_voice}.")
-                    break
             except Exception as e:
-                 pass
-            if not tts_success and attempt == 0 and primary_voice != fallback_voice:
-                 logger.warning(f"Fallo con voz {primary_voice}, intentando voz de respaldo: {fallback_voice}")
-            elif not tts_success and attempt < retries - 1:
-                 logger.warning(f"Fallo con voz {current_voice}, reintentando...")
         if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
-             logger.error(f"Fallo en la generación de voz después de {retries} intentos. Archivo de audio no creado o es muy pequeño.")
              raise ValueError("Error generando voz a partir del guion (fallo de TTS).")
         temp_intermediate_files.append(voz_path)
@@ -530,20 +570,6 @@ def crear_video(prompt_type, input_text, selected_voice, musica_file=None):
             except Exception as e:
                 logger.warning(f"Error buscando videos para '{keyword}': {str(e)}")
-        if len(videos_data) < total_desired_videos / 2:
-    logger.warning(f"Pocos videos encontrados ({len(videos_data)}). Intentando con palabras clave genéricas.")
-    generic_keywords = ["nature", "city", "background", "abstract"]
-    for keyword in generic_keywords:
-        if len(videos_data) >= total_desired_videos:
-            break
-        try:
-            videos = buscar_videos_pexels(keyword, PEXELS_API_KEY, per_page=2)
-            if videos:
-                videos_data.extend(videos)
-                logger.info(f"Encontrados {len(videos)} videos para '{keyword}' (genérico). Total data: {len(videos_data)}")
-        except Exception as e:
-            logger.warning(f"Error buscando videos para '{keyword}': {str(e)}")
         if len(videos_data) < total_desired_videos / 2:
             logger.warning(f"Pocos videos encontrados ({len(videos_data)}). Intentando con palabras clave genéricas.")
             generic_keywords = ["nature", "city", "background", "abstract"]
@@ -929,7 +955,7 @@ def crear_video(prompt_type, input_text, selected_voice, musica_file=None):
              logger.info(f"Directorio temporal intermedio {temp_dir_intermediate} persistirá para que Gradio lea el video final.")
-# CAMBIO CRÍTICO: run_app ahora recibe TODOS los inputs que Gradio le pasa desde el evento click
 def run_app(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice): # <-- Recibe el valor del Dropdown
     logger.info("="*80)
     logger.info("SOLICITUD RECIBIDA EN INTERFAZ")
@@ -947,10 +973,11 @@ def run_app(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
         return None, None, gr.update(value="⚠️ Por favor, ingresa texto para el guion o el tema.", interactive=False)
     # Validar la voz seleccionada. Si no es válida, usar la por defecto.
-    # AVAILABLE_VOICES se obtiene al inicio.
-    if selected_voice not in AVAILABLE_VOICES:
-        logger.warning(f"Voz seleccionada inválida o no encontrada en la lista: '{selected_voice}'. Usando voz por defecto: {DEFAULT_VOICE}.")
-        selected_voice = DEFAULT_VOICE
     else:
         logger.info(f"Voz seleccionada validada: {selected_voice}")
@@ -961,12 +988,12 @@ def run_app(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
         logger.info(f"Archivo de música recibido: {musica_file}")
     else:
         logger.info("No se proporcionó archivo de música.")
-    logger.info(f"Voz final a usar: {selected_voice}") # Loguear la voz final que se usará
     try:
         logger.info("Llamando a crear_video...")
-        # Pasar el input_text elegido, la voz seleccionada y el archivo de música a crear_video
-        video_path = crear_video(prompt_type, input_text, selected_voice, musica_file) # <-- PASAR selected_voice a crear_video
         if video_path and os.path.exists(video_path):
             logger.info(f"crear_video retornó path: {video_path}")
@@ -1038,8 +1065,8 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft(), css="
             # --- COMPONENTE: Selección de Voz ---
             voice_dropdown = gr.Dropdown(
                 label="Seleccionar Voz para Guion",
-                choices=AVAILABLE_VOICES,
-                value=DEFAULT_VOICE,
                 interactive=True
                  # visible=... <-- ¡NO DEBE ESTAR AQUÍ!
             )
@@ -1058,7 +1085,7 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft(), css="
             file_output = gr.File(
                 label="Descargar Archivo de Video",
                 interactive=False,
-                visible=False # <-- ESTÁ BIEN AQUÍ porque su visibilidad se controla por el último then()
                  # visible=... <-- ¡NO DEBE ESTAR AQUÍ si ya está visible=False arriba!
             )
             status_output = gr.Textbox(
@@ -1093,11 +1120,8 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft(), css="
         outputs=[video_output, file_output, status_output]
     ).then(
         # Acción 3 (síncrona): Hacer visible el enlace de descarga
-        # Recibe las salidas de la Acción 2
         lambda video_path, file_path, status_msg: gr.update(visible=file_path is not None),
-        # Inputs para esta lambda son los outputs del .then() anterior
         inputs=[video_output, file_output, status_output],
-        # Actualizamos la visibilidad del componente file_output
         outputs=[file_output]
     )

 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 from keybert import KeyBERT
+# Importación correcta
 from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip, concatenate_audioclips, AudioClip
 import re
 import math
 logger.info("="*80)
 # Diccionario de voces TTS disponibles organizadas por idioma
+# Puedes expandir esta lista si conoces otros IDs de voz de Edge TTS
 VOCES_DISPONIBLES = {
     "Español (España)": {
         "es-ES-JuanNeural": "Juan (España) - Masculino",
     choices = []
     for region, voices in VOCES_DISPONIBLES.items():
         for voice_id, voice_name in voices.items():
+            # Formato: (Texto a mostrar en el dropdown, Valor que se pasa)
+            choices.append((f"{voice_name} ({region})", voice_id))
     return choices
+# Obtener las voces al inicio del script
+# Usamos la lista predefinida por ahora para evitar el error de inicio con la API
+# Si deseas obtenerlas dinámicamente, descomenta la siguiente línea y comenta la que usa get_voice_choices()
+# AVAILABLE_VOICES = asyncio.run(get_available_voices())
+AVAILABLE_VOICES = get_voice_choices() # <-- Usamos la lista predefinida y aplanada
+# Establecer una voz por defecto inicial
+DEFAULT_VOICE_ID = "es-ES-JuanNeural" # ID de Juan
+# Buscar el nombre amigable para la voz por defecto si existe
+DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
+for text, voice_id in AVAILABLE_VOICES:
+    if voice_id == DEFAULT_VOICE_ID:
+        DEFAULT_VOICE_NAME = text
+        break
+# Si Juan no está en la lista (ej. lista de fallback), usar la primera voz disponible
+if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
+    DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "en-US-AriaNeural"
+    DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "Aria (United States) - Female" # Fallback name
+logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
 # Clave API de Pexels
 PEXELS_API_KEY = os.environ.get("PEXELS_API_KEY")
 if not PEXELS_API_KEY:
         text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         cleaned_text = text.strip()
+        # Limpieza mejorada de la frase de instrucción
         try:
+            # Buscar el índice de inicio del prompt original dentro del texto generado
+            prompt_in_output_idx = text.lower().find(prompt.lower())
+            if prompt_in_output_idx != -1:
+                # Tomar todo el texto DESPUÉS del prompt original
+                cleaned_text = text[prompt_in_output_idx + len(prompt):].strip()
+                logger.debug("Texto limpiado tomando parte después del prompt original.")
             else:
+                 # Fallback si el prompt original no está exacto en la salida: buscar la frase de instrucción base
                  instruction_start_idx = text.find(instruction_phrase_start)
                  if instruction_start_idx != -1:
+                      # Tomar texto después de la frase base (puede incluir el prompt)
+                      cleaned_text = text[instruction_start_idx + len(instruction_phrase_start):].strip()
+                      logger.debug("Texto limpiado tomando parte después de la frase de instrucción base.")
+                 else:
+                      # Si ni la frase de instrucción ni el prompt se encuentran, usar el texto original
+                      logger.warning("No se pudo identificar el inicio del guión generado. Usando texto generado completo.")
+                      cleaned_text = text.strip() # Limpieza básica
         except Exception as e:
              logger.warning(f"Error durante la limpieza heurística del guión de IA: {e}. Usando texto generado sin limpieza adicional.")
+             cleaned_text = re.sub(r'<[^>]+>', '', text).strip() # Limpieza básica como fallback
+        # Asegurarse de que el texto resultante no sea solo la instrucción o vacío
+        if not cleaned_text or len(cleaned_text) < 10: # Umbral de longitud mínima
+             logger.warning("El guión generado parece muy corto o vacío después de la limpieza heurística. Usando el texto generado original (sin limpieza adicional).")
+             cleaned_text = re.sub(r'<[^>]+>', '', text).strip() # Fallback al texto original limpio
+        # Limpieza final de caracteres especiales y espacios sobrantes
         cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text).strip()
+        cleaned_text = cleaned_text.lstrip(':').strip() # Quitar posibles ':' al inicio
+        cleaned_text = cleaned_text.lstrip('.').strip() # Quitar posibles '.' al inicio
+        # Intentar obtener al menos una oración completa si es posible para un inicio más limpio
         sentences = cleaned_text.split('.')
         if sentences and sentences[0].strip():
             final_text = sentences[0].strip() + '.'
+            # Añadir la segunda oración si existe y es razonable
+            if len(sentences) > 1 and sentences[1].strip() and len(final_text.split()) < max_length * 0.7: # Usar un 70% de max_length como umbral
                  final_text += " " + sentences[1].strip() + "."
+                 final_text = final_text.replace("..", ".") # Limpiar doble punto
             logger.info(f"Guion generado final (Truncado a 100 chars): '{final_text[:100]}...'")
             return final_text.strip()
         logger.info(f"Guion generado final (sin oraciones completas detectadas - Truncado): '{cleaned_text[:100]}...'")
+        return cleaned_text.strip() # Si no se puede formar una oración, devolver el texto limpio tal cual
     except Exception as e:
         logger.error(f"Error generando guion con GPT-2 (fuera del bloque de limpieza): {str(e)}", exc_info=True)
         logger.warning("Usando prompt original como guion debido al error de generación.")
         return prompt.strip()
+# Función TTS ahora recibe la voz a usar
 async def text_to_speech(text, output_path, voice):
     logger.info(f"Convirtiendo texto a voz | Caracteres: {len(text)} | Voz: {voice} | Salida: {output_path}")
     if not text or not text.strip():
     logger.info(f"Palabras clave finales: {top_keywords}")
     return top_keywords
+# crear_video ahora recibe la voz seleccionada
 def crear_video(prompt_type, input_text, selected_voice, musica_file=None):
     logger.info("="*80)
     logger.info(f"INICIANDO CREACIÓN DE VIDEO | Tipo: {prompt_type}")
         logger.info(f"Directorio temporal intermedio creado: {temp_dir_intermediate}")
         temp_intermediate_files = []
+        # 2. Generar audio de voz usando la voz seleccionada, con reintentos si falla
         logger.info("Generando audio de voz...")
         voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
+        tts_voices_to_try = [selected_voice] # Intentar primero la voz seleccionada
+        # Añadir voces de respaldo si no están ya en la lista y son diferentes a la seleccionada
+        # Nos aseguramos de no añadir None o IDs vacíos a la lista de reintento
+        if "es-ES-JuanNeural" not in tts_voices_to_try and "es-ES-JuanNeural" is not None: tts_voices_to_try.append("es-ES-JuanNeural")
+        if "es-ES-ElviraNeural" not in tts_voices_to_try and "es-ES-ElviraNeural" is not None: tts_voices_to_try.append("es-ES-ElviraNeural")
+        # Si la lista de voces disponibles es fiable, podrías usar un subconjunto ordenado para reintentos más amplios
+        # Opcional: si AVAILABLE_VOICES es fiable, podrías usar un subconjunto ordenado para reintentos
+        # Ejemplo: for voice_id in [selected_voice] + sorted([v[1] for v in AVAILABLE_VOICES if v[1].startswith('es-') and v[1] != selected_voice]) + sorted([v[1] for v in AVAILABLE_VOICES if not v[1].startswith('es-') and v[1] != selected_voice]):
         tts_success = False
+        tried_voices = set() # Usar un set para rastrear voces intentadas de forma eficiente
+        for current_voice in tts_voices_to_try:
+            if not current_voice or current_voice in tried_voices: continue # Evitar intentar IDs None/vacíos o duplicados
+            tried_voices.add(current_voice)
+            logger.info(f"Intentando TTS con voz: {current_voice}...")
             try:
                 tts_success = asyncio.run(text_to_speech(guion, voz_path, voice=current_voice))
                 if tts_success:
+                    logger.info(f"TTS exitoso con voz '{current_voice}'.")
+                    break # Salir del bucle de reintento si tiene éxito
             except Exception as e:
+                 logger.warning(f"Fallo al generar TTS con voz '{current_voice}': {str(e)}", exc_info=True)
+                 pass # Continuar al siguiente intento
+        # Verificar si el archivo fue creado después de todos los intentos
         if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
+             logger.error("Fallo en la generación de voz después de todos los intentos. Archivo de audio no creado o es muy pequeño.")
              raise ValueError("Error generando voz a partir del guion (fallo de TTS).")
         temp_intermediate_files.append(voz_path)
             except Exception as e:
                 logger.warning(f"Error buscando videos para '{keyword}': {str(e)}")
         if len(videos_data) < total_desired_videos / 2:
             logger.warning(f"Pocos videos encontrados ({len(videos_data)}). Intentando con palabras clave genéricas.")
             generic_keywords = ["nature", "city", "background", "abstract"]
              logger.info(f"Directorio temporal intermedio {temp_dir_intermediate} persistirá para que Gradio lea el video final.")
+# run_app ahora recibe todos los inputs, incluyendo la voz seleccionada
 def run_app(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice): # <-- Recibe el valor del Dropdown
     logger.info("="*80)
     logger.info("SOLICITUD RECIBIDA EN INTERFAZ")
         return None, None, gr.update(value="⚠️ Por favor, ingresa texto para el guion o el tema.", interactive=False)
     # Validar la voz seleccionada. Si no es válida, usar la por defecto.
+    # AVAILABLE_VOICES se obtiene al inicio. Hay que buscar si el voice_id existe en la lista de pares (nombre, id)
+    voice_ids_disponibles = [v[1] for v in AVAILABLE_VOICES]
+    if selected_voice not in voice_ids_disponibles:
+        logger.warning(f"Voz seleccionada inválida o no encontrada en la lista: '{selected_voice}'. Usando voz por defecto: {DEFAULT_VOICE_ID}.")
+        selected_voice = DEFAULT_VOICE_ID # <-- Usar el ID de la voz por defecto
     else:
         logger.info(f"Voz seleccionada validada: {selected_voice}")
         logger.info(f"Archivo de música recibido: {musica_file}")
     else:
         logger.info("No se proporcionó archivo de música.")
+    logger.info(f"Voz final a usar (ID): {selected_voice}") # Loguear el ID de la voz final
     try:
         logger.info("Llamando a crear_video...")
+        # Pasar el input_text elegido, la voz seleccionada (el ID) y el archivo de música a crear_video
+        video_path = crear_video(prompt_type, input_text, selected_voice, musica_file) # <-- PASAR selected_voice (ID) a crear_video
         if video_path and os.path.exists(video_path):
             logger.info(f"crear_video retornó path: {video_path}")
             # --- COMPONENTE: Selección de Voz ---
             voice_dropdown = gr.Dropdown(
                 label="Seleccionar Voz para Guion",
+                choices=AVAILABLE_VOICES, # Usar la lista obtenida al inicio
+                value=DEFAULT_VOICE_ID,      # Usar el ID de la voz por defecto calculada
                 interactive=True
                  # visible=... <-- ¡NO DEBE ESTAR AQUÍ!
             )
             file_output = gr.File(
                 label="Descargar Archivo de Video",
                 interactive=False,
+                visible=False # <-- ESTÁ BIEN AQUÍ
                  # visible=... <-- ¡NO DEBE ESTAR AQUÍ si ya está visible=False arriba!
             )
             status_output = gr.Textbox(
         outputs=[video_output, file_output, status_output]
     ).then(
         # Acción 3 (síncrona): Hacer visible el enlace de descarga
         lambda video_path, file_path, status_msg: gr.update(visible=file_path is not None),
         inputs=[video_output, file_output, status_output],
         outputs=[file_output]
     )