playdiff / app.py
thelip's picture
Update app.py
0aaab48 verified
# app.py
import gradio as gr
from playdiffusion import PlayDiffusion, RVCInput
import os
import wget
import torch # Import torch to check for CUDA availability
# --- Model Downloading ---
print("--- Checking and Downloading Model Assets ---")
MODEL_FILES = {
"kmeans_10k.npy": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/kmeans_10k.npy",
"last_250k_fixed.pkl": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/last_250k_fixed.pkl",
"tokenizer-multi_bpe16384_merged_extended_58M.json": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/tokenizer-multi_bpe16384_merged_extended_58M.json",
"v090_g_01105000": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/v090_g_01105000",
"voice_encoder_1992000.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/voice_encoder_1992000.pt",
"xlsr2_1b_v2_custom.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/xlsr2_1b_v2_custom.pt"
}
for filename, url in MODEL_FILES.items():
if not os.path.exists(filename):
print(f"Downloading {filename}...")
wget.download(url, filename)
else:
print(f"{filename} already exists. Skipping download.")
# --- Gradio App ---
# --- KEY CHANGE: Force the model to load on the CPU ---
# Check if a GPU is available, otherwise default to CPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"--- Device selected: {device.upper()} ---")
print("Initializing PlayDiffusion... This will load the models into memory.")
# Pass the selected device to the PlayDiffusion constructor
inpainter = PlayDiffusion(device=device)
print("PlayDiffusion initialized successfully.")
# ----------------------------------------------------
def speech_rvc(rvc_source_speech, rvc_target_voice):
if rvc_source_speech is None or rvc_target_voice is None:
raise gr.Error("Please provide both a source speech audio and a target voice audio.")
print("Starting voice conversion...")
converted_audio = inpainter.rvc(RVCInput(source_speech=rvc_source_speech, target_voice=rvc_target_voice))
print("Voice conversion finished.")
return converted_audio
with gr.Blocks(theme=gr.themes.Soft(), title="PlayDiffusion Voice Conversion") as demo:
gr.Markdown("# πŸ—£οΈ PlayDiffusion Voice Conversion")
gr.Markdown(f"### Running on: **{device.upper()}**")
gr.Markdown("Upload a **Source Speech** audio and a **Target Voice** audio to convert the speech.")
if device == 'cpu':
gr.Warning("Running on CPU. The voice conversion process will be extremely slow and may time out.")
with gr.Row():
rvc_source_speech = gr.Audio(label="Source Speech", sources=["upload", "microphone"], type="filepath")
rvc_target_voice = gr.Audio(label="Target Voice", sources=["upload", "microphone"], type="filepath")
rvc_submit = gr.Button("πŸš€ Run Voice Conversion", variant="primary")
gr.Markdown("### Converted Speech Output")
rvc_output = gr.Audio(label="Result", interactive=False)
rvc_submit.click(fn=speech_rvc, inputs=[rvc_source_speech, rvc_target_voice], outputs=[rvc_output])
demo.launch()