Spaces:

AIdsadong
/

my-voxcpm-app

Sleeping

App Files Files Community

AIdsadong commited on Sep 20

Commit

67e7ff5

verified ·

1 Parent(s): 1baa65c

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -1,40 +1,40 @@
 import gradio as gr
 import torch
-from modeling_voxcpm import VoxCPM
-from feature_extraction_voxcpm import VoxCPMFeatureExtractor
 import scipy.io.wavfile
 import os
 # 检查是否有可用的 GPU，否则使用 CPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# 加载模型和处理器
-# 使用 cache_dir 参数将模型下载到持久化存储中
-cache_dir = "./models"
-if not os.path.exists(cache_dir):
-    os.makedirs(cache_dir)
-model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B", cache_dir=cache_dir).to(device)
-feature_extractor = VoxCPMFeatureExtractor.from_pretrained("openbmb/VoxCPM-0.5B", cache_dir=cache_dir)
 def generate_speech(text):
     """
     使用 VoxCPM 模型生成语音的函数。
     """
     if not text or text.strip() == "":
-        # 返回空的音频和一条提示信息
-        return None, "请输入有效文本"
-    # 使用模型生成语音波形
-    wav = model.generate(
-        text=text,
-        prompt_wav_path=None,
-        prompt_text=None,
-        cfg_value=2.0
-    )
-    # 获取采样率
-    sampling_rate = feature_extractor.sampling_rate
     # 将生成的波形保存为临时的 .wav 文件
     output_filename = "output.wav"
@@ -48,7 +48,7 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=5, label="输入文本", placeholder="在这里输入你想要转换为语音的中文或英文文本..."),
     outputs=gr.Audio(label="生成的语音"),
     title="🎙️ VoxCPM-0.5B 文本到语音转换",
-    description="这是一个使用 openbmb/VoxCPM-0.5B 模型进行文本到语音合成的演示。输入一些文本，然后点击 'Submit' 来生成语音。",
     examples=[
         ["VoxCPM 是一个创新的端到端 TTS 模型，旨在生成高度富有表现力的语音。"],
         ["今天天气真不错，我们一起去散步吧！"],

 import gradio as gr
 import torch
+from transformers import AutoProcessor, AutoModel
 import scipy.io.wavfile
 import os
 # 检查是否有可用的 GPU，否则使用 CPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# 模型 ID
+model_id = "openbmb/VoxCPM-0.5B"
+# 使用 trust_remote_code=True 加载模型和处理器
+# 这会自动处理背后所需的代码，无需我们手动添加 .py 文件
+model = AutoModel.from_pretrained(model_id, trust_remote_code=True).to(device)
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 def generate_speech(text):
     """
     使用 VoxCPM 模型生成语音的函数。
     """
     if not text or text.strip() == "":
+        return None
+    # 使用处理器准备输入
+    inputs = processor(text=text, return_tensors="pt").to(device)
+    # 生成语音波形
+    # 注意: .generate() 可能会返回一个包含波形的元组
+    output = model.generate(**inputs, cfg_value=2.0)
+    # 提取波形数据，它通常是输出的第一个元素
+    wav = output[0] if isinstance(output, tuple) else output
+    wav = wav.cpu().numpy()
+    # 从处理器获取采样率
+    sampling_rate = processor.sampling_rate
     # 将生成的波形保存为临时的 .wav 文件
     output_filename = "output.wav"
     inputs=gr.Textbox(lines=5, label="输入文本", placeholder="在这里输入你想要转换为语音的中文或英文文本..."),
     outputs=gr.Audio(label="生成的语音"),
     title="🎙️ VoxCPM-0.5B 文本到语音转换",
+    description="这是一个使用 openbmb/VoxCPM-0.5B 模型进行文本到语音合成的演示。",
     examples=[
         ["VoxCPM 是一个创新的端到端 TTS 模型，旨在生成高度富有表现力的语音。"],
         ["今天天气真不错，我们一起去散步吧！"],