Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -110,6 +110,11 @@ python app.py
|
|
| 110 |
- OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
|
| 111 |
- 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
## ⚡ 升级到 GPU 版本 (可选)
|
| 114 |
|
| 115 |
如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡,并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive),您可以升级到 GPU 版本以获得显著的性能提升。
|
|
|
|
| 110 |
- OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
|
| 111 |
- 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
|
| 112 |
|
| 113 |
+
如果你是用一键脚本部署(同 `whisperx-api` 的方式),直接用 `chatterbox-api/chouxiang/deploy.py`,并在 `chatterbox-api/chouxiang/.env` 里设置:
|
| 114 |
+
- `DEFAULT_STEPS=200`(CPU 建议 100–300,越小越快)
|
| 115 |
+
- `TORCH_NUM_THREADS=2`
|
| 116 |
+
- `TORCH_NUM_INTEROP_THREADS=1`
|
| 117 |
+
|
| 118 |
## ⚡ 升级到 GPU 版本 (可选)
|
| 119 |
|
| 120 |
如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡,并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive),您可以升级到 GPU 版本以获得显著的性能提升。
|
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import os,time,shutil,sys
|
|
| 2 |
#os.environ['htts_proxy']='http://127.0.0.1:10808'
|
| 3 |
#os.environ['htt_proxy']='http://127.0.0.1:10808'
|
| 4 |
from pathlib import Path
|
|
|
|
| 5 |
import threading
|
| 6 |
import warnings
|
| 7 |
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
@@ -22,10 +23,10 @@ threads = _env_int("THREADS", 4)
|
|
| 22 |
|
| 23 |
ROOT_DIR=Path(os.getcwd()).as_posix()
|
| 24 |
# 对于国内用户,使用Hugging Face镜像能显著提高下载速度
|
| 25 |
-
os.environ
|
| 26 |
-
os.environ
|
| 27 |
-
os.environ
|
| 28 |
-
os.environ
|
| 29 |
|
| 30 |
import subprocess,traceback
|
| 31 |
import io
|
|
@@ -157,6 +158,24 @@ model = None
|
|
| 157 |
model_lock = threading.Lock()
|
| 158 |
app = Flask(__name__)
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
def get_model():
|
| 161 |
global model
|
| 162 |
if model is not None:
|
|
@@ -241,8 +260,19 @@ def tts_openai_compatible():
|
|
| 241 |
try:
|
| 242 |
# 生成WAV音频
|
| 243 |
tts_model = get_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
t0 = time.time()
|
| 245 |
-
wav_tensor =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
|
| 247 |
|
| 248 |
# 检查请求的响应格式,默认为mp3
|
|
@@ -327,8 +357,20 @@ def tts_with_prompt():
|
|
| 327 |
# --- Stage 3: Generate TTS using the converted WAV file ---
|
| 328 |
print(f" - Generating TTS with prompt: {temp_wav_path}")
|
| 329 |
tts_model = get_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
t0 = time.time()
|
| 331 |
-
wav_tensor =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
|
| 333 |
|
| 334 |
# --- Stage 4: Format and Return Response Based on Request ---
|
|
|
|
| 2 |
#os.environ['htts_proxy']='http://127.0.0.1:10808'
|
| 3 |
#os.environ['htt_proxy']='http://127.0.0.1:10808'
|
| 4 |
from pathlib import Path
|
| 5 |
+
import inspect
|
| 6 |
import threading
|
| 7 |
import warnings
|
| 8 |
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
|
|
| 23 |
|
| 24 |
ROOT_DIR=Path(os.getcwd()).as_posix()
|
| 25 |
# 对于国内用户,使用Hugging Face镜像能显著提高下载速度
|
| 26 |
+
os.environ.setdefault('HF_HOME', ROOT_DIR + "/models")
|
| 27 |
+
os.environ.setdefault('HF_HUB_DISABLE_SYMLINKS_WARNING', 'true')
|
| 28 |
+
os.environ.setdefault('HF_HUB_DISABLE_PROGRESS_BARS', 'true')
|
| 29 |
+
os.environ.setdefault('HF_HUB_DOWNLOAD_TIMEOUT', "1200")
|
| 30 |
|
| 31 |
import subprocess,traceback
|
| 32 |
import io
|
|
|
|
| 158 |
model_lock = threading.Lock()
|
| 159 |
app = Flask(__name__)
|
| 160 |
|
| 161 |
+
def generate_tts(tts_model, text, *, steps=None, **kwargs):
|
| 162 |
+
sig = None
|
| 163 |
+
try:
|
| 164 |
+
sig = inspect.signature(tts_model.generate)
|
| 165 |
+
except Exception:
|
| 166 |
+
sig = None
|
| 167 |
+
|
| 168 |
+
if steps is not None and sig is not None:
|
| 169 |
+
for name in ("steps", "num_steps", "n_steps", "sampling_steps", "num_inference_steps"):
|
| 170 |
+
if name in sig.parameters:
|
| 171 |
+
kwargs[name] = int(steps)
|
| 172 |
+
break
|
| 173 |
+
|
| 174 |
+
if sig is not None:
|
| 175 |
+
kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
|
| 176 |
+
|
| 177 |
+
return tts_model.generate(text, **kwargs)
|
| 178 |
+
|
| 179 |
def get_model():
|
| 180 |
global model
|
| 181 |
if model is not None:
|
|
|
|
| 260 |
try:
|
| 261 |
# 生成WAV音频
|
| 262 |
tts_model = get_model()
|
| 263 |
+
steps = data.get("steps", data.get("num_steps", None))
|
| 264 |
+
if steps is None:
|
| 265 |
+
steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
|
| 266 |
+
print(f"[APIv1] steps={steps}")
|
| 267 |
t0 = time.time()
|
| 268 |
+
wav_tensor = generate_tts(
|
| 269 |
+
tts_model,
|
| 270 |
+
text,
|
| 271 |
+
exaggeration=exaggeration,
|
| 272 |
+
cfg_weight=cfg_weight,
|
| 273 |
+
language_id=lang,
|
| 274 |
+
steps=steps,
|
| 275 |
+
)
|
| 276 |
print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
|
| 277 |
|
| 278 |
# 检查请求的响应格式,默认为mp3
|
|
|
|
| 357 |
# --- Stage 3: Generate TTS using the converted WAV file ---
|
| 358 |
print(f" - Generating TTS with prompt: {temp_wav_path}")
|
| 359 |
tts_model = get_model()
|
| 360 |
+
steps = request.form.get("steps") or request.form.get("num_steps")
|
| 361 |
+
if steps is None or str(steps).strip() == "":
|
| 362 |
+
steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
|
| 363 |
+
print(f"[APIv2] steps={steps}")
|
| 364 |
t0 = time.time()
|
| 365 |
+
wav_tensor = generate_tts(
|
| 366 |
+
tts_model,
|
| 367 |
+
text,
|
| 368 |
+
audio_prompt_path=temp_wav_path,
|
| 369 |
+
exaggeration=exaggeration,
|
| 370 |
+
cfg_weight=cfg_weight,
|
| 371 |
+
language_id=lang,
|
| 372 |
+
steps=steps,
|
| 373 |
+
)
|
| 374 |
print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
|
| 375 |
|
| 376 |
# --- Stage 4: Format and Return Response Based on Request ---
|