Spaces:
Build error
Build error
| import os | |
| import uuid | |
| import numpy as np | |
| import torch | |
| import soundfile as sf | |
| from gtts import gTTS | |
| import edge_tts | |
| from inference import Inference | |
| import asyncio | |
| #git+https://github.com/suno-ai/bark.git | |
| # from transformers import AutoProcessor, BarkModel | |
| # import nltk | |
| # from nltk.tokenize import sent_tokenize | |
| # from bark import SAMPLE_RATE | |
| # now_dir = os.getcwd() | |
| def cast_to_device(tensor, device): | |
| try: | |
| return tensor.to(device) | |
| except Exception as e: | |
| print(e) | |
| return tensor | |
| # Buscar la forma de evitar descargar el archivo de 4gb cada vez que crea una instancia | |
| # def _bark_conversion_(text, voice_preset): | |
| # os.makedirs(os.path.join(now_dir, "tts"), exist_ok=True) | |
| # device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # dtype = torch.float32 if "cpu" in device else torch.float16 | |
| # bark_processor = AutoProcessor.from_pretrained( | |
| # "suno/bark", | |
| # cache_dir=os.path.join(now_dir, "tts", "suno/bark"), | |
| # torch_dtype=dtype, | |
| # ) | |
| # bark_model = BarkModel.from_pretrained( | |
| # "suno/bark", | |
| # cache_dir=os.path.join(now_dir, "tts", "suno/bark"), | |
| # torch_dtype=dtype, | |
| # ).to(device) | |
| # # bark_model.enable_cpu_offload() | |
| # inputs = bark_processor(text=[text], return_tensors="pt", voice_preset=voice_preset) | |
| # tensor_dict = { | |
| # k: cast_to_device(v, device) if hasattr(v, "to") else v | |
| # for k, v in inputs.items() | |
| # } | |
| # speech_values = bark_model.generate(**tensor_dict, do_sample=True) | |
| # sampling_rate = bark_model.generation_config.sample_rate | |
| # speech = speech_values.cpu().numpy().squeeze() | |
| # return speech, sampling_rate | |
| def tts_infer(tts_text, model_url, tts_method, tts_model): | |
| print("*****************") | |
| print(tts_text) | |
| print(model_url) | |
| if not tts_text: | |
| return 'Primero escribe el texto que quieres convertir.', None | |
| if not tts_model: | |
| return 'Selecciona un modelo TTS antes de convertir.', None | |
| if not model_url: | |
| return 'Escribe la url de modelo que quieres usar antes de convertir.', None | |
| f0_method = "harvest" | |
| output_folder = "audios" | |
| os.makedirs(output_folder, exist_ok=True) | |
| converted_tts_filename = os.path.join(output_folder, f"tts_out_{uuid.uuid4()}.wav") | |
| success = False | |
| if len(tts_text) > 60: | |
| tts_text = tts_text[:60] | |
| print("DEMO; limit to 60 characters") | |
| language = tts_model[:2] | |
| if tts_method == "Edge-tts": | |
| try: | |
| asyncio.run( | |
| edge_tts.Communicate( | |
| tts_text, "-".join(tts_model.split("-")[:-1]) | |
| ).save(converted_tts_filename) | |
| ) | |
| success = True | |
| except Exception as e: | |
| print("ERROR", e) | |
| try: | |
| tts = gTTS(tts_text, lang=language) | |
| tts.save(converted_tts_filename) | |
| print( | |
| f"No audio was received. Please change the tts voice for {tts_model}. USING gTTS." | |
| ) | |
| success = True | |
| except: | |
| tts = gTTS("a", lang=language) | |
| tts.save(converted_tts_filename) | |
| print("Error: Audio will be replaced.") | |
| success = False | |
| # elif tts_method == "Bark-tts": | |
| # try: | |
| # script = tts_text.replace("\n", " ").strip() | |
| # sentences = sent_tokenize(script) | |
| # silence = np.zeros(int(0.25 * SAMPLE_RATE)) | |
| # pieces = [] | |
| # for sentence in sentences: | |
| # audio_array, _ = _bark_conversion_(sentence, tts_model.split("-")[0]) | |
| # pieces += [audio_array, silence.copy()] | |
| # sf.write( | |
| # file=converted_tts_filename, samplerate=SAMPLE_RATE, data=np.concatenate(pieces) | |
| # ) | |
| # except Exception as e: | |
| # print(f"{e}") | |
| # return None, None | |
| if success: | |
| inference = Inference( | |
| model_name=model_url, | |
| f0_method=f0_method, | |
| source_audio_path=converted_tts_filename, | |
| output_file_name=os.path.join("./audio-outputs", os.path.basename(converted_tts_filename)), | |
| ) | |
| output = inference.run() | |
| if os.path.exists(converted_tts_filename): | |
| os.remove(converted_tts_filename) | |
| if os.path.exists(os.path.join("weights", inference.model_name)): | |
| os.remove(os.path.join("weights", inference.model_name)) | |
| if 'success' in output and output['success']: | |
| return output, output['file'] | |
| else: | |
| return output, None | |
| else: | |
| return "Ocurri贸 un error durante la conversi贸n", None | |