danhtran2mind commited on
Commit
1efd737
·
verified ·
1 Parent(s): 7a64576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -4,6 +4,27 @@ import soundfile as sf
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
  from datasets import load_dataset
6
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Load the processor, model, and vocoder
9
  processor = SpeechT5Processor.from_pretrained("danhtran2mind/Viet-SpeechT5-TTS-finetuning")
@@ -15,9 +36,7 @@ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validat
15
 
16
  def generate_speech(text, voice):
17
  # Select speaker embedding based on voice choice
18
- speaker_dict = {"male": 2000,
19
- "female": 7000}
20
-
21
  speaker_id = speaker_dict[voice.lower()]
22
  speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["xvector"]).unsqueeze(0)
23
 
@@ -38,6 +57,9 @@ def generate_speech(text, voice):
38
  sf.write(output_path, speech.numpy(), samplerate=16000)
39
  return output_path
40
 
 
 
 
41
  # Create Gradio interface
42
  iface = gr.Interface(
43
  fn=generate_speech,
@@ -47,9 +69,10 @@ iface = gr.Interface(
47
  ],
48
  outputs=gr.Audio(label="Generated Speech"),
49
  title="Vietnamese Text-to-Speech with SpeechT5",
50
- description="Enter Vietnamese text and select a voice (Male or Female) to generate speech."
 
51
  )
52
 
53
  # Launch the app
54
  if __name__ == "__main__":
55
- iface.launch(debug=True)
 
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
  from datasets import load_dataset
6
  import numpy as np
7
+ import json
8
+ import os
9
+
10
+ # Directory containing config files
11
+ CONFIG_DIR = "assets/Viet-SpeechT5-TTS-finetuning"
12
+
13
+ # Load all config.json files from the directory
14
+ def load_configs(directory):
15
+ examples = []
16
+ for root, _, files in os.walk(directory):
17
+ for file in files:
18
+ if file == "config.json":
19
+ file_path = os.path.join(root, file)
20
+ try:
21
+ with open(file_path, 'r', encoding='utf-8') as f:
22
+ config = json.load(f)
23
+ if "input_text" in config and "voice" in config:
24
+ examples.append([config["input_text"], config["voice"]])
25
+ except Exception as e:
26
+ print(f"Error reading {file_path}: {e}")
27
+ return examples
28
 
29
  # Load the processor, model, and vocoder
30
  processor = SpeechT5Processor.from_pretrained("danhtran2mind/Viet-SpeechT5-TTS-finetuning")
 
36
 
37
  def generate_speech(text, voice):
38
  # Select speaker embedding based on voice choice
39
+ speaker_dict = {"male": 2000, "female": 7000}
 
 
40
  speaker_id = speaker_dict[voice.lower()]
41
  speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["xvector"]).unsqueeze(0)
42
 
 
57
  sf.write(output_path, speech.numpy(), samplerate=16000)
58
  return output_path
59
 
60
+ # Load examples from config files
61
+ examples = load_configs(CONFIG_DIR)
62
+
63
  # Create Gradio interface
64
  iface = gr.Interface(
65
  fn=generate_speech,
 
69
  ],
70
  outputs=gr.Audio(label="Generated Speech"),
71
  title="Vietnamese Text-to-Speech with SpeechT5",
72
+ description="Enter Vietnamese text and select a voice (Male or Female) to generate speech.",
73
+ examples=examples
74
  )
75
 
76
  # Launch the app
77
  if __name__ == "__main__":
78
+ iface.launch()