刘鑫 commited on
Commit
f6f653d
·
1 Parent(s): 1700cda

set zero gpu inference

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -8,6 +8,13 @@ from pathlib import Path
8
  import tempfile
9
  import soundfile as sf
10
  import time
 
 
 
 
 
 
 
11
 
12
 
13
  def setup_cache_env():
@@ -109,9 +116,8 @@ def get_asr_model():
109
  global _asr_model
110
  if _asr_model is None:
111
  from funasr import AutoModel
112
- print("=" * 50)
113
- print("Loading ASR model...")
114
- print(f" Using local path: {ASR_LOCAL_DIR}")
115
  start_time = time.time()
116
  _asr_model = AutoModel(
117
  model=ASR_LOCAL_DIR, # Use local directory path
@@ -120,8 +126,8 @@ def get_asr_model():
120
  device="cuda:0",
121
  )
122
  load_time = time.time() - start_time
123
- print(f"ASR model loaded. (耗时: {load_time:.2f}s)")
124
- print("=" * 50)
125
  return _asr_model
126
 
127
 
@@ -130,9 +136,8 @@ def get_voxcpm_model():
130
  global _voxcpm_model
131
  if _voxcpm_model is None:
132
  import voxcpm
133
- print("=" * 50)
134
- print("Loading VoxCPM model...")
135
- print(f" Using local path: {VOXCPM_LOCAL_DIR}")
136
  start_time = time.time()
137
  _voxcpm_model = voxcpm.VoxCPM(
138
  voxcpm_model_path=VOXCPM_LOCAL_DIR,
@@ -140,8 +145,8 @@ def get_voxcpm_model():
140
  enable_denoiser=False, # Disable denoiser to avoid ZipEnhancer download
141
  )
142
  load_time = time.time() - start_time
143
- print(f"VoxCPM model loaded. (耗时: {load_time:.2f}s)")
144
- print("=" * 50)
145
  return _voxcpm_model
146
 
147
 
@@ -150,16 +155,16 @@ def prompt_wav_recognition(prompt_wav: Optional[str]) -> str:
150
  """Use ASR to recognize prompt audio text."""
151
  if prompt_wav is None or not prompt_wav.strip():
152
  return ""
153
- print("=" * 50)
154
- print("[ASR] 开始语音识别...")
155
  asr_model = get_asr_model()
156
  start_time = time.time()
157
  res = asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
158
  inference_time = time.time() - start_time
159
  text = res[0]["text"].split('|>')[-1]
160
- print(f"[ASR] 识别结果: {text}")
161
- print(f"[ASR] 推理耗时: {inference_time:.2f}s")
162
- print("=" * 50)
163
  return text
164
 
165
 
@@ -193,9 +198,9 @@ def generate_tts_audio_gpu(
193
  prompt_wav_path = f.name
194
 
195
  try:
196
- print("=" * 50)
197
- print("[TTS] 开始语音合成...")
198
- print(f"[TTS] 目标文本: {text}")
199
  start_time = time.time()
200
  wav = voxcpm_model.generate(
201
  text=text,
@@ -209,8 +214,8 @@ def generate_tts_audio_gpu(
209
  inference_time = time.time() - start_time
210
  audio_duration = len(wav) / voxcpm_model.tts_model.sample_rate
211
  rtf = inference_time / audio_duration if audio_duration > 0 else 0
212
- print(f"[TTS] 推理耗时: {inference_time:.2f}s | 音频时长: {audio_duration:.2f}s | RTF: {rtf:.3f}")
213
- print("=" * 50)
214
  return (voxcpm_model.tts_model.sample_rate, wav)
215
  finally:
216
  # Cleanup temp file
 
8
  import tempfile
9
  import soundfile as sf
10
  import time
11
+ from datetime import datetime
12
+
13
+
14
+ def log(msg: str):
15
+ """打印带时间戳的日志"""
16
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
17
+ print(f"[{timestamp}] {msg}")
18
 
19
 
20
  def setup_cache_env():
 
116
  global _asr_model
117
  if _asr_model is None:
118
  from funasr import AutoModel
119
+ log("=" * 50)
120
+ log(f"Loading ASR model from: {ASR_LOCAL_DIR}")
 
121
  start_time = time.time()
122
  _asr_model = AutoModel(
123
  model=ASR_LOCAL_DIR, # Use local directory path
 
126
  device="cuda:0",
127
  )
128
  load_time = time.time() - start_time
129
+ log(f"ASR model loaded. (耗时: {load_time:.2f}s)")
130
+ log("=" * 50)
131
  return _asr_model
132
 
133
 
 
136
  global _voxcpm_model
137
  if _voxcpm_model is None:
138
  import voxcpm
139
+ log("=" * 50)
140
+ log(f"Loading VoxCPM model from: {VOXCPM_LOCAL_DIR}")
 
141
  start_time = time.time()
142
  _voxcpm_model = voxcpm.VoxCPM(
143
  voxcpm_model_path=VOXCPM_LOCAL_DIR,
 
145
  enable_denoiser=False, # Disable denoiser to avoid ZipEnhancer download
146
  )
147
  load_time = time.time() - start_time
148
+ log(f"VoxCPM model loaded. (耗时: {load_time:.2f}s)")
149
+ log("=" * 50)
150
  return _voxcpm_model
151
 
152
 
 
155
  """Use ASR to recognize prompt audio text."""
156
  if prompt_wav is None or not prompt_wav.strip():
157
  return ""
158
+ log("=" * 50)
159
+ log("[ASR] 开始语音识别...")
160
  asr_model = get_asr_model()
161
  start_time = time.time()
162
  res = asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
163
  inference_time = time.time() - start_time
164
  text = res[0]["text"].split('|>')[-1]
165
+ log(f"[ASR] 识别结果: {text}")
166
+ log(f"[ASR] 推理耗时: {inference_time:.2f}s")
167
+ log("=" * 50)
168
  return text
169
 
170
 
 
198
  prompt_wav_path = f.name
199
 
200
  try:
201
+ log("=" * 50)
202
+ log("[TTS] 开始语音合成...")
203
+ log(f"[TTS] 目标文本: {text}")
204
  start_time = time.time()
205
  wav = voxcpm_model.generate(
206
  text=text,
 
214
  inference_time = time.time() - start_time
215
  audio_duration = len(wav) / voxcpm_model.tts_model.sample_rate
216
  rtf = inference_time / audio_duration if audio_duration > 0 else 0
217
+ log(f"[TTS] 推理耗时: {inference_time:.2f}s | 音频时长: {audio_duration:.2f}s | RTF: {rtf:.3f}")
218
+ log("=" * 50)
219
  return (voxcpm_model.tts_model.sample_rate, wav)
220
  finally:
221
  # Cleanup temp file