Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
e36805d
1
Parent(s):
d3bb466
Fix NeMo CUDA error 35 on ZeroGPU by disabling CUDA graphs in decoding config
Browse filesThe previous fix disabled CUDA graphs on the decoding_computer object, but
transcribe(timestamps=True) calls change_decoding_strategy() which rebuilds
the decoder from cfg, re-enabling CUDA graphs. Setting
cfg.decoding.greedy.use_cuda_graph_decoder=False persists across rebuilds.
Also loads the English ASR model eagerly at startup so ZeroGPU can properly
hijack CUDA calls at global scope, instead of lazy-loading inside a
@spaces.GPU-decorated function.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
preprocess/tools/lyric_transcription.py
CHANGED
|
@@ -150,12 +150,15 @@ class _ASREnModel:
|
|
| 150 |
map_location=device,
|
| 151 |
)
|
| 152 |
self.model.eval()
|
| 153 |
-
# Disable CUDA Graphs
|
| 154 |
-
#
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
@staticmethod
|
| 161 |
def _clean_word(word: str) -> str:
|
|
@@ -219,9 +222,6 @@ class LyricTranscriber:
|
|
| 219 |
verbose (bool): Whether to print verbose logs.
|
| 220 |
"""
|
| 221 |
self.verbose = verbose
|
| 222 |
-
self.device = device
|
| 223 |
-
self.zh_model_path = zh_model_path
|
| 224 |
-
self.en_model_path = en_model_path
|
| 225 |
|
| 226 |
if self.verbose:
|
| 227 |
print(
|
|
@@ -233,8 +233,9 @@ class LyricTranscriber:
|
|
| 233 |
# Always initialize Chinese ASR.
|
| 234 |
self.zh_model = _ASRZhModel(device=device, model_path=zh_model_path)
|
| 235 |
|
| 236 |
-
# English ASR
|
| 237 |
-
|
|
|
|
| 238 |
|
| 239 |
if self.verbose:
|
| 240 |
print("[lyric transcription] init: success")
|
|
@@ -256,11 +257,6 @@ class LyricTranscriber:
|
|
| 256 |
|
| 257 |
lang = (language or "auto").lower()
|
| 258 |
if lang in {"english"}:
|
| 259 |
-
if self.en_model is None:
|
| 260 |
-
# Lazy-load NeMo model only when English is actually used.
|
| 261 |
-
if v:
|
| 262 |
-
print("[lyric transcription] init English ASR, please make sure NeMo is installed")
|
| 263 |
-
self.en_model = _ASREnModel(model_path=self.en_model_path, device=self.device)
|
| 264 |
out = self.en_model.process(wav_fn)
|
| 265 |
else:
|
| 266 |
out = self.zh_model.process(wav_fn)
|
|
|
|
| 150 |
map_location=device,
|
| 151 |
)
|
| 152 |
self.model.eval()
|
| 153 |
+
# Disable CUDA Graphs via the decoding config to avoid
|
| 154 |
+
# "CUDA failure! 35" (cudaErrorInsufficientDriver) on
|
| 155 |
+
# CUDA 12.8 + ZeroGPU where the driver is too old for graph capture.
|
| 156 |
+
# This must be set in the config (not on the decoding_computer) because
|
| 157 |
+
# transcribe(timestamps=True) calls change_decoding_strategy() which
|
| 158 |
+
# rebuilds the decoder from cfg.
|
| 159 |
+
from omegaconf import open_dict
|
| 160 |
+
with open_dict(self.model.cfg.decoding):
|
| 161 |
+
self.model.cfg.decoding.greedy.use_cuda_graph_decoder = False
|
| 162 |
|
| 163 |
@staticmethod
|
| 164 |
def _clean_word(word: str) -> str:
|
|
|
|
| 222 |
verbose (bool): Whether to print verbose logs.
|
| 223 |
"""
|
| 224 |
self.verbose = verbose
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
if self.verbose:
|
| 227 |
print(
|
|
|
|
| 233 |
# Always initialize Chinese ASR.
|
| 234 |
self.zh_model = _ASRZhModel(device=device, model_path=zh_model_path)
|
| 235 |
|
| 236 |
+
# Initialize English ASR eagerly so the model is loaded at global
|
| 237 |
+
# scope where ZeroGPU can hijack CUDA calls properly.
|
| 238 |
+
self.en_model = _ASREnModel(model_path=en_model_path, device=device)
|
| 239 |
|
| 240 |
if self.verbose:
|
| 241 |
print("[lyric transcription] init: success")
|
|
|
|
| 257 |
|
| 258 |
lang = (language or "auto").lower()
|
| 259 |
if lang in {"english"}:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
out = self.en_model.process(wav_fn)
|
| 261 |
else:
|
| 262 |
out = self.zh_model.process(wav_fn)
|