TTS-Talker

Running

Quang Long commited on Sep 25

Commit

52ff743

1 Parent(s): b8c51d6

update ui

Files changed (2) hide show

app.py CHANGED Viewed

@@ -281,6 +281,8 @@ def sadtalker_demo():
                     interactive=False,
                     value="",
                     visible=True,
                 )
             def enable_generate(audio, text, image):

                     interactive=False,
                     value="",
                     visible=True,
+                    lines=3,
+                    max_lines=4
                 )
             def enable_generate(audio, text, image):

app_tts.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import os
 import hashlib
 import soundfile as sf
 import gradio as gr
@@ -10,6 +11,7 @@ from datetime import datetime
 from huggingface_hub import login
 from cached_path import cached_path
 # Import hàm infer gốc của f5_tts
 from f5_tts.infer.utils_infer import (
     preprocess_ref_audio_text,
@@ -162,6 +164,7 @@ def infer_tts(
         cache_path = get_audio_cache_path(text_chunk, ref_audio_orig, model)
         if os.path.exists(cache_path):
             wave, sample_rate = sf.read(cache_path)
         else:
             clean_chunk = normalize_for_tts(text_chunk)  # <- thêm dòng này
@@ -175,6 +178,7 @@ def infer_tts(
                 speed=speed,
                 nfe_step=16,  # giảm tải
             )
             sf.write(cache_path, wave, sample_rate)
         final_audio_segments.append(wave)

+import os, sys
 import hashlib
 import soundfile as sf
 import gradio as gr
 from huggingface_hub import login
 from cached_path import cached_path
+sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
 # Import hàm infer gốc của f5_tts
 from f5_tts.infer.utils_infer import (
     preprocess_ref_audio_text,
         cache_path = get_audio_cache_path(text_chunk, ref_audio_orig, model)
         if os.path.exists(cache_path):
+            print(f"Using cached audio: {cache_path}")
             wave, sample_rate = sf.read(cache_path)
         else:
             clean_chunk = normalize_for_tts(text_chunk)  # <- thêm dòng này
                 speed=speed,
                 nfe_step=16,  # giảm tải
             )
+            print(f"[CACHE] Saved new audio to: {cache_path}")
             sf.write(cache_path, wave, sample_rate)
         final_audio_segments.append(wave)