Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from threading import Thread
|
| 3 |
|
|
@@ -43,35 +44,28 @@ Bark can generate highly realistic, multilingual speech as well as other audio -
|
|
| 43 |
In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
|
| 44 |
"""
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
bark = BarkModel.from_pretrained(HUB_PATH)
|
| 49 |
-
else:
|
| 50 |
-
bark = BarkModel.from_pretrained(HUB_PATH).to(device)
|
| 51 |
-
bark = bark.to_bettertransformer()
|
| 52 |
|
| 53 |
|
| 54 |
-
# Inference
|
| 55 |
-
|
|
|
|
| 56 |
if voice_preset not in speaker_embeddings:
|
| 57 |
voice_preset = None
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
text,
|
| 61 |
-
]
|
| 62 |
-
inputs = processor(sentences, voice_preset=voice_preset).to(device)
|
| 63 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
| 64 |
|
| 65 |
fine_output = bark.generate(
|
| 66 |
**inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
|
| 67 |
)
|
| 68 |
-
|
| 69 |
print("Fine tokens generated")
|
| 70 |
-
|
| 71 |
with torch.no_grad():
|
| 72 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
| 73 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 74 |
-
|
| 75 |
return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
| 76 |
|
| 77 |
|
|
|
|
| 1 |
+
import spaces
|
| 2 |
import torch
|
| 3 |
from threading import Thread
|
| 4 |
|
|
|
|
| 44 |
In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
|
| 45 |
"""
|
| 46 |
|
| 47 |
+
bark = BarkModel.from_pretrained(HUB_PATH).to(device)
|
| 48 |
+
bark = bark.to_bettertransformer()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
+
# Inference on Zero GPU
|
| 52 |
+
@spaces.GPU
|
| 53 |
+
def generate_audio(text, voice_preset=None, lag=0):
|
| 54 |
if voice_preset not in speaker_embeddings:
|
| 55 |
voice_preset = None
|
| 56 |
+
|
| 57 |
+
inputs = processor([text], voice_preset=voice_preset).to(device)
|
|
|
|
|
|
|
|
|
|
| 58 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
| 59 |
|
| 60 |
fine_output = bark.generate(
|
| 61 |
**inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
|
| 62 |
)
|
|
|
|
| 63 |
print("Fine tokens generated")
|
| 64 |
+
|
| 65 |
with torch.no_grad():
|
| 66 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
| 67 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 68 |
+
|
| 69 |
return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
| 70 |
|
| 71 |
|