Spaces:
Build error
Build error
xJuuzouYTx
commited on
Commit
·
6f1ebe2
1
Parent(s):
1b5ebf2
[ADD] elevenlabs tts
Browse files- app.py +20 -12
- requirements.txt +1 -1
- tts/constants.py +1 -1
- tts/conversion.py +26 -7
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from utils.model import model_downloader, get_model
|
|
| 7 |
import requests
|
| 8 |
import json
|
| 9 |
from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
|
| 10 |
-
from tts.conversion import tts_infer
|
| 11 |
|
| 12 |
api_url = "https://rvc-models-api.onrender.com/uploadfile/"
|
| 13 |
|
|
@@ -130,9 +130,11 @@ def search_model(name):
|
|
| 130 |
|
| 131 |
def update_tts_methods_voice(select_value):
|
| 132 |
if select_value == "Edge-tts":
|
| 133 |
-
return gr.update(choices=EDGE_VOICES)
|
| 134 |
elif select_value == "Bark-tts":
|
| 135 |
-
return gr.update(choices=BARK_VOICES)
|
|
|
|
|
|
|
| 136 |
|
| 137 |
with gr.Blocks() as app:
|
| 138 |
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
|
|
@@ -163,19 +165,26 @@ with gr.Blocks() as app:
|
|
| 163 |
with gr.Row():
|
| 164 |
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
|
| 165 |
|
| 166 |
-
with gr.
|
| 167 |
-
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="
|
| 168 |
-
tts_model = gr.Dropdown(choices=
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
| 171 |
with gr.Row():
|
| 172 |
tts_vc_output1 = gr.Textbox(label="Salida")
|
| 173 |
tts_vc_output2 = gr.Audio(label="Audio de salida")
|
| 174 |
|
| 175 |
-
tts_btn =
|
| 176 |
-
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model], outputs=[tts_vc_output1, tts_vc_output2])
|
| 177 |
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
gr.HTML("<h4>Buscar modelos</h4>")
|
| 180 |
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
|
| 181 |
# Salida
|
|
@@ -198,7 +207,6 @@ with gr.Blocks() as app:
|
|
| 198 |
btn_post_model = gr.Button(value="Publicar")
|
| 199 |
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
|
| 200 |
|
| 201 |
-
|
| 202 |
# with gr.Column():
|
| 203 |
# model_voice_path07 = gr.Dropdown(
|
| 204 |
# label=i18n("RVC Model:"),
|
|
|
|
| 7 |
import requests
|
| 8 |
import json
|
| 9 |
from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
|
| 10 |
+
from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
|
| 11 |
|
| 12 |
api_url = "https://rvc-models-api.onrender.com/uploadfile/"
|
| 13 |
|
|
|
|
| 130 |
|
| 131 |
def update_tts_methods_voice(select_value):
|
| 132 |
if select_value == "Edge-tts":
|
| 133 |
+
return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
|
| 134 |
elif select_value == "Bark-tts":
|
| 135 |
+
return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
|
| 136 |
+
elif select_value == 'ElevenLabs':
|
| 137 |
+
return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True)
|
| 138 |
|
| 139 |
with gr.Blocks() as app:
|
| 140 |
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
|
|
|
|
| 165 |
with gr.Row():
|
| 166 |
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
|
| 167 |
|
| 168 |
+
with gr.Row():
|
| 169 |
+
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="ElevenLabs", label="Método TTS:", visible=True)
|
| 170 |
+
tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
|
| 171 |
+
tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True)
|
| 172 |
+
|
| 173 |
+
tts_btn = gr.Button(value="Convertir")
|
| 174 |
+
|
| 175 |
with gr.Row():
|
| 176 |
tts_vc_output1 = gr.Textbox(label="Salida")
|
| 177 |
tts_vc_output2 = gr.Audio(label="Audio de salida")
|
| 178 |
|
| 179 |
+
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key], outputs=[tts_vc_output1, tts_vc_output2])
|
|
|
|
| 180 |
|
| 181 |
+
tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
|
| 182 |
+

|
| 183 |
+
""", visible=True)
|
| 184 |
+
|
| 185 |
+
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key])
|
| 186 |
+
|
| 187 |
+
with gr.Tab("Modelos"):
|
| 188 |
gr.HTML("<h4>Buscar modelos</h4>")
|
| 189 |
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
|
| 190 |
# Salida
|
|
|
|
| 207 |
btn_post_model = gr.Button(value="Publicar")
|
| 208 |
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
|
| 209 |
|
|
|
|
| 210 |
# with gr.Column():
|
| 211 |
# model_voice_path07 = gr.Dropdown(
|
| 212 |
# label=i18n("RVC Model:"),
|
requirements.txt
CHANGED
|
@@ -27,7 +27,7 @@ Cython==0.29.30
|
|
| 27 |
decorator==5.1.1
|
| 28 |
discord.py==2.3.2
|
| 29 |
edge-tts==6.1.5
|
| 30 |
-
elevenlabs
|
| 31 |
entrypoints==0.4
|
| 32 |
exceptiongroup==1.1.3
|
| 33 |
executing==1.2.0
|
|
|
|
| 27 |
decorator==5.1.1
|
| 28 |
discord.py==2.3.2
|
| 29 |
edge-tts==6.1.5
|
| 30 |
+
elevenlabs
|
| 31 |
entrypoints==0.4
|
| 32 |
exceptiongroup==1.1.3
|
| 33 |
executing==1.2.0
|
tts/constants.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
VOICE_METHODS = ["Edge-tts", "
|
| 2 |
|
| 3 |
BARK_VOICES = [
|
| 4 |
"v2/en_speaker_0-Male",
|
|
|
|
| 1 |
+
VOICE_METHODS = ["Edge-tts", "ElevenLabs",]
|
| 2 |
|
| 3 |
BARK_VOICES = [
|
| 4 |
"v2/en_speaker_0-Male",
|
tts/conversion.py
CHANGED
|
@@ -7,6 +7,18 @@ from gtts import gTTS
|
|
| 7 |
import edge_tts
|
| 8 |
from inference import Inference
|
| 9 |
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
#git+https://github.com/suno-ai/bark.git
|
| 11 |
# from transformers import AutoProcessor, BarkModel
|
| 12 |
# import nltk
|
|
@@ -50,16 +62,11 @@ def cast_to_device(tensor, device):
|
|
| 50 |
# return speech, sampling_rate
|
| 51 |
|
| 52 |
|
| 53 |
-
def tts_infer(tts_text, model_url, tts_method, tts_model):
|
| 54 |
-
print("*****************")
|
| 55 |
-
print(tts_text)
|
| 56 |
-
print(model_url)
|
| 57 |
if not tts_text:
|
| 58 |
return 'Primero escribe el texto que quieres convertir.', None
|
| 59 |
if not tts_model:
|
| 60 |
return 'Selecciona un modelo TTS antes de convertir.', None
|
| 61 |
-
if not model_url:
|
| 62 |
-
return 'Escribe la url de modelo que quieres usar antes de convertir.', None
|
| 63 |
|
| 64 |
f0_method = "harvest"
|
| 65 |
output_folder = "audios"
|
|
@@ -94,7 +101,19 @@ def tts_infer(tts_text, model_url, tts_method, tts_model):
|
|
| 94 |
tts.save(converted_tts_filename)
|
| 95 |
print("Error: Audio will be replaced.")
|
| 96 |
success = False
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# elif tts_method == "Bark-tts":
|
| 99 |
# try:
|
| 100 |
# script = tts_text.replace("\n", " ").strip()
|
|
|
|
| 7 |
import edge_tts
|
| 8 |
from inference import Inference
|
| 9 |
import asyncio
|
| 10 |
+
from elevenlabs import voices, generate, save
|
| 11 |
+
|
| 12 |
+
ELEVENLABS_VOICES_RAW = voices()
|
| 13 |
+
|
| 14 |
+
def get_elevenlabs_voice_names():
|
| 15 |
+
elevenlabs_voice_names = []
|
| 16 |
+
for voice in ELEVENLABS_VOICES_RAW:
|
| 17 |
+
elevenlabs_voice_names.append(voice.name)
|
| 18 |
+
return elevenlabs_voice_names
|
| 19 |
+
|
| 20 |
+
ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
|
| 21 |
+
|
| 22 |
#git+https://github.com/suno-ai/bark.git
|
| 23 |
# from transformers import AutoProcessor, BarkModel
|
| 24 |
# import nltk
|
|
|
|
| 62 |
# return speech, sampling_rate
|
| 63 |
|
| 64 |
|
| 65 |
+
def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
|
|
|
|
|
|
|
|
|
|
| 66 |
if not tts_text:
|
| 67 |
return 'Primero escribe el texto que quieres convertir.', None
|
| 68 |
if not tts_model:
|
| 69 |
return 'Selecciona un modelo TTS antes de convertir.', None
|
|
|
|
|
|
|
| 70 |
|
| 71 |
f0_method = "harvest"
|
| 72 |
output_folder = "audios"
|
|
|
|
| 101 |
tts.save(converted_tts_filename)
|
| 102 |
print("Error: Audio will be replaced.")
|
| 103 |
success = False
|
| 104 |
+
if tts_method == 'ElevenLabs':
|
| 105 |
+
audio = generate(
|
| 106 |
+
text=tts_text,
|
| 107 |
+
voice=tts_model,
|
| 108 |
+
model="eleven_multilingual_v2",
|
| 109 |
+
api_key=tts_api_key
|
| 110 |
+
)
|
| 111 |
+
save(audio=audio, filename=converted_tts_filename)
|
| 112 |
+
success = True
|
| 113 |
+
|
| 114 |
+
if not model_url:
|
| 115 |
+
return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
|
| 116 |
+
|
| 117 |
# elif tts_method == "Bark-tts":
|
| 118 |
# try:
|
| 119 |
# script = tts_text.replace("\n", " ").strip()
|