Spaces:

hf-audio
/

vocos-bark

Runtime error

sanchit-gandhi commited on Mar 25, 2024

Commit

5c99329

verified ·

1 Parent(s): b52d0fa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 from threading import Thread
@@ -43,35 +44,28 @@ Bark can generate highly realistic, multilingual speech as well as other audio -
 In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
 """
-# import model
-if device == "cpu":
-    bark = BarkModel.from_pretrained(HUB_PATH)
-else:
-    bark = BarkModel.from_pretrained(HUB_PATH).to(device)
-    bark = bark.to_bettertransformer()
-# Inference
-def generate_audio(text, voice_preset = None, lag = 0):
     if voice_preset not in speaker_embeddings:
         voice_preset = None
-    sentences = [
-        text,
-    ]
-    inputs = processor(sentences, voice_preset=voice_preset).to(device)
     # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
     fine_output = bark.generate(
         **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
     )
     print("Fine tokens generated")
     with torch.no_grad():
         features = vocos.codes_to_features(fine_output.transpose(0,1))
         vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
     return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())

+import spaces
 import torch
 from threading import Thread
 In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
 """
+bark = BarkModel.from_pretrained(HUB_PATH).to(device)
+bark = bark.to_bettertransformer()
+# Inference on Zero GPU
+@spaces.GPU
+def generate_audio(text, voice_preset=None, lag=0):
     if voice_preset not in speaker_embeddings:
         voice_preset = None
+    inputs = processor([text], voice_preset=voice_preset).to(device)
     # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
     fine_output = bark.generate(
         **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
     )
     print("Fine tokens generated")
     with torch.no_grad():
         features = vocos.codes_to_features(fine_output.transpose(0,1))
         vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
     return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())