Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
-
|
|
|
|
| 4 |
import scipy.io.wavfile
|
| 5 |
import os
|
| 6 |
|
|
@@ -10,10 +11,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 10 |
# 模型 ID
|
| 11 |
model_id = "openbmb/VoxCPM-0.5B"
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
| 17 |
|
| 18 |
def generate_speech(text):
|
| 19 |
"""
|
|
@@ -22,19 +22,20 @@ def generate_speech(text):
|
|
| 22 |
if not text or text.strip() == "":
|
| 23 |
return None
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
output = model.generate(**inputs, cfg_value=2.0)
|
| 31 |
-
|
| 32 |
-
# 提取波形数据,它通常是输出的第一个元素
|
| 33 |
-
wav = output[0] if isinstance(output, tuple) else output
|
| 34 |
-
wav = wav.cpu().numpy()
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
# 将生成的波形保存为临时的 .wav 文件
|
| 40 |
output_filename = "output.wav"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
+
# 使用 voxcpm 库中专属的类
|
| 4 |
+
from voxcpm import VoxCPM, VoxCPMFeatureExtractor
|
| 5 |
import scipy.io.wavfile
|
| 6 |
import os
|
| 7 |
|
|
|
|
| 11 |
# 模型 ID
|
| 12 |
model_id = "openbmb/VoxCPM-0.5B"
|
| 13 |
|
| 14 |
+
# 使用模型官方的 .from_pretrained 方法加载
|
| 15 |
+
model = VoxCPM.from_pretrained(model_id).to(device)
|
| 16 |
+
feature_extractor = VoxCPMFeatureExtractor.from_pretrained(model_id)
|
|
|
|
| 17 |
|
| 18 |
def generate_speech(text):
|
| 19 |
"""
|
|
|
|
| 22 |
if not text or text.strip() == "":
|
| 23 |
return None
|
| 24 |
|
| 25 |
+
# 使用模型自带的 generate 方法生成语音波形
|
| 26 |
+
wav = model.generate(
|
| 27 |
+
text=text,
|
| 28 |
+
prompt_wav_path=None,
|
| 29 |
+
prompt_text=None,
|
| 30 |
+
cfg_value=2.0
|
| 31 |
+
)
|
| 32 |
|
| 33 |
+
# 从 feature_extractor 获取采样率
|
| 34 |
+
sampling_rate = feature_extractor.sampling_rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# 确保波形数据是 CPU 上的 numpy 数组
|
| 37 |
+
if isinstance(wav, torch.Tensor):
|
| 38 |
+
wav = wav.cpu().numpy()
|
| 39 |
|
| 40 |
# 将生成的波形保存为临时的 .wav 文件
|
| 41 |
output_filename = "output.wav"
|