|
|
|
|
|
import gradio as gr |
|
|
from playdiffusion import PlayDiffusion, RVCInput |
|
|
import os |
|
|
import wget |
|
|
import torch |
|
|
|
|
|
|
|
|
print("--- Checking and Downloading Model Assets ---") |
|
|
MODEL_FILES = { |
|
|
"kmeans_10k.npy": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/kmeans_10k.npy", |
|
|
"last_250k_fixed.pkl": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/last_250k_fixed.pkl", |
|
|
"tokenizer-multi_bpe16384_merged_extended_58M.json": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/tokenizer-multi_bpe16384_merged_extended_58M.json", |
|
|
"v090_g_01105000": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/v090_g_01105000", |
|
|
"voice_encoder_1992000.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/voice_encoder_1992000.pt", |
|
|
"xlsr2_1b_v2_custom.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/xlsr2_1b_v2_custom.pt" |
|
|
} |
|
|
|
|
|
for filename, url in MODEL_FILES.items(): |
|
|
if not os.path.exists(filename): |
|
|
print(f"Downloading {filename}...") |
|
|
wget.download(url, filename) |
|
|
else: |
|
|
print(f"{filename} already exists. Skipping download.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
print(f"--- Device selected: {device.upper()} ---") |
|
|
print("Initializing PlayDiffusion... This will load the models into memory.") |
|
|
|
|
|
|
|
|
inpainter = PlayDiffusion(device=device) |
|
|
|
|
|
print("PlayDiffusion initialized successfully.") |
|
|
|
|
|
|
|
|
|
|
|
def speech_rvc(rvc_source_speech, rvc_target_voice): |
|
|
if rvc_source_speech is None or rvc_target_voice is None: |
|
|
raise gr.Error("Please provide both a source speech audio and a target voice audio.") |
|
|
print("Starting voice conversion...") |
|
|
converted_audio = inpainter.rvc(RVCInput(source_speech=rvc_source_speech, target_voice=rvc_target_voice)) |
|
|
print("Voice conversion finished.") |
|
|
return converted_audio |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="PlayDiffusion Voice Conversion") as demo: |
|
|
gr.Markdown("# π£οΈ PlayDiffusion Voice Conversion") |
|
|
gr.Markdown(f"### Running on: **{device.upper()}**") |
|
|
gr.Markdown("Upload a **Source Speech** audio and a **Target Voice** audio to convert the speech.") |
|
|
if device == 'cpu': |
|
|
gr.Warning("Running on CPU. The voice conversion process will be extremely slow and may time out.") |
|
|
|
|
|
with gr.Row(): |
|
|
rvc_source_speech = gr.Audio(label="Source Speech", sources=["upload", "microphone"], type="filepath") |
|
|
rvc_target_voice = gr.Audio(label="Target Voice", sources=["upload", "microphone"], type="filepath") |
|
|
|
|
|
rvc_submit = gr.Button("π Run Voice Conversion", variant="primary") |
|
|
gr.Markdown("### Converted Speech Output") |
|
|
rvc_output = gr.Audio(label="Result", interactive=False) |
|
|
|
|
|
rvc_submit.click(fn=speech_rvc, inputs=[rvc_source_speech, rvc_target_voice], outputs=[rvc_output]) |
|
|
|
|
|
|
|
|
demo.launch() |