Spaces:

thelip
/

playdiff

Runtime error

App Files Files Community

playdiff / app.py

thelip

Create app.py

43d7e05 verified 2 months ago

raw

history blame

2.53 kB

	import gradio as gr
	from playdiffusion import PlayDiffusion, RVCInput
	import os
	import wget

	# --- Model Downloading ---
	print("--- Checking and Downloading Model Assets ---")
	MODEL_FILES = {
	"kmeans_10k.npy": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/kmeans_10k.npy",
	"last_250k_fixed.pkl": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/last_250k_fixed.pkl",
	"tokenizer-multi_bpe16384_merged_extended_58M.json": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/tokenizer-multi_bpe16384_merged_extended_58M.json",
	"v090_g_01105000": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/v090_g_01105000",
	"voice_encoder_1992000.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/voice_encoder_1992000.pt",
	"xlsr2_1b_v2_custom.pt": "https://huggingface.co/PlayHT/PlayDiffusion/resolve/main/xlsr2_1b_v2_custom.pt"
	}

	for filename, url in MODEL_FILES.items():
	if not os.path.exists(filename):
	print(f"Downloading {filename}...")
	wget.download(url, filename)
	else:
	print(f"{filename} already exists. Skipping download.")

	# --- Gradio App ---
	print("Initializing PlayDiffusion... This will load the models into memory.")
	inpainter = PlayDiffusion()
	print("PlayDiffusion initialized successfully.")

	def speech_rvc(rvc_source_speech, rvc_target_voice):
	if rvc_source_speech is None or rvc_target_voice is None:
	raise gr.Error("Please provide both a source speech audio and a target voice audio.")
	print("Starting voice conversion...")
	converted_audio = inpainter.rvc(RVCInput(source_speech=rvc_source_speech, target_voice=rvc_target_voice))
	print("Voice conversion finished.")
	return converted_audio

	with gr.Blocks(theme=gr.themes.Soft(), title="PlayDiffusion Voice Conversion") as demo:
	gr.Markdown("# 🗣️ PlayDiffusion Voice Conversion")
	gr.Markdown("Upload a Source Speech audio and a Target Voice audio to convert the speech.")

	with gr.Row():
	rvc_source_speech = gr.Audio(label="Source Speech", sources=["upload", "microphone"], type="filepath")
	rvc_target_voice = gr.Audio(label="Target Voice", sources=["upload", "microphone"], type="filepath")

	rvc_submit = gr.Button("🚀 Run Voice Conversion", variant="primary")
	gr.Markdown("### Converted Speech Output")
	rvc_output = gr.Audio(label="Result", interactive=False)

	rvc_submit.click(fn=speech_rvc, inputs=[rvc_source_speech, rvc_target_voice], outputs=[rvc_output])

	demo.launch()