HFHash789 commited on
Commit
d5e7e0d
·
verified ·
1 Parent(s): a46644d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +5 -0
  2. app.py +48 -6
README.md CHANGED
@@ -110,6 +110,11 @@ python app.py
110
  - OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
111
  - 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
112
 
 
 
 
 
 
113
  ## ⚡ 升级到 GPU 版本 (可选)
114
 
115
  如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡,并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive),您可以升级到 GPU 版本以获得显著的性能提升。
 
110
  - OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
111
  - 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
112
 
113
+ 如果你是用一键脚本部署(同 `whisperx-api` 的方式),直接用 `chatterbox-api/chouxiang/deploy.py`,并在 `chatterbox-api/chouxiang/.env` 里设置:
114
+ - `DEFAULT_STEPS=200`(CPU 建议 100–300,越小越快)
115
+ - `TORCH_NUM_THREADS=2`
116
+ - `TORCH_NUM_INTEROP_THREADS=1`
117
+
118
  ## ⚡ 升级到 GPU 版本 (可选)
119
 
120
  如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡,并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive),您可以升级到 GPU 版本以获得显著的性能提升。
app.py CHANGED
@@ -2,6 +2,7 @@ import os,time,shutil,sys
2
  #os.environ['htts_proxy']='http://127.0.0.1:10808'
3
  #os.environ['htt_proxy']='http://127.0.0.1:10808'
4
  from pathlib import Path
 
5
  import threading
6
  import warnings
7
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -22,10 +23,10 @@ threads = _env_int("THREADS", 4)
22
 
23
  ROOT_DIR=Path(os.getcwd()).as_posix()
24
  # 对于国内用户,使用Hugging Face镜像能显著提高下载速度
25
- os.environ['HF_HOME'] = ROOT_DIR + "/models"
26
- os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'true'
27
- os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = 'true'
28
- os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = "1200"
29
 
30
  import subprocess,traceback
31
  import io
@@ -157,6 +158,24 @@ model = None
157
  model_lock = threading.Lock()
158
  app = Flask(__name__)
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  def get_model():
161
  global model
162
  if model is not None:
@@ -241,8 +260,19 @@ def tts_openai_compatible():
241
  try:
242
  # 生成WAV音频
243
  tts_model = get_model()
 
 
 
 
244
  t0 = time.time()
245
- wav_tensor = tts_model.generate(text,exaggeration=exaggeration,cfg_weight=cfg_weight,language_id=lang)
 
 
 
 
 
 
 
246
  print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
247
 
248
  # 检查请求的响应格式,默认为mp3
@@ -327,8 +357,20 @@ def tts_with_prompt():
327
  # --- Stage 3: Generate TTS using the converted WAV file ---
328
  print(f" - Generating TTS with prompt: {temp_wav_path}")
329
  tts_model = get_model()
 
 
 
 
330
  t0 = time.time()
331
- wav_tensor = tts_model.generate(text, audio_prompt_path=temp_wav_path,exaggeration=exaggeration,cfg_weight=cfg_weight,language_id=lang)
 
 
 
 
 
 
 
 
332
  print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
333
 
334
  # --- Stage 4: Format and Return Response Based on Request ---
 
2
  #os.environ['htts_proxy']='http://127.0.0.1:10808'
3
  #os.environ['htt_proxy']='http://127.0.0.1:10808'
4
  from pathlib import Path
5
+ import inspect
6
  import threading
7
  import warnings
8
  warnings.filterwarnings("ignore", category=FutureWarning)
 
23
 
24
  ROOT_DIR=Path(os.getcwd()).as_posix()
25
  # 对于国内用户,使用Hugging Face镜像能显著提高下载速度
26
+ os.environ.setdefault('HF_HOME', ROOT_DIR + "/models")
27
+ os.environ.setdefault('HF_HUB_DISABLE_SYMLINKS_WARNING', 'true')
28
+ os.environ.setdefault('HF_HUB_DISABLE_PROGRESS_BARS', 'true')
29
+ os.environ.setdefault('HF_HUB_DOWNLOAD_TIMEOUT', "1200")
30
 
31
  import subprocess,traceback
32
  import io
 
158
  model_lock = threading.Lock()
159
  app = Flask(__name__)
160
 
161
+ def generate_tts(tts_model, text, *, steps=None, **kwargs):
162
+ sig = None
163
+ try:
164
+ sig = inspect.signature(tts_model.generate)
165
+ except Exception:
166
+ sig = None
167
+
168
+ if steps is not None and sig is not None:
169
+ for name in ("steps", "num_steps", "n_steps", "sampling_steps", "num_inference_steps"):
170
+ if name in sig.parameters:
171
+ kwargs[name] = int(steps)
172
+ break
173
+
174
+ if sig is not None:
175
+ kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
176
+
177
+ return tts_model.generate(text, **kwargs)
178
+
179
  def get_model():
180
  global model
181
  if model is not None:
 
260
  try:
261
  # 生成WAV音频
262
  tts_model = get_model()
263
+ steps = data.get("steps", data.get("num_steps", None))
264
+ if steps is None:
265
+ steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
266
+ print(f"[APIv1] steps={steps}")
267
  t0 = time.time()
268
+ wav_tensor = generate_tts(
269
+ tts_model,
270
+ text,
271
+ exaggeration=exaggeration,
272
+ cfg_weight=cfg_weight,
273
+ language_id=lang,
274
+ steps=steps,
275
+ )
276
  print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
277
 
278
  # 检查请求的响应格式,默认为mp3
 
357
  # --- Stage 3: Generate TTS using the converted WAV file ---
358
  print(f" - Generating TTS with prompt: {temp_wav_path}")
359
  tts_model = get_model()
360
+ steps = request.form.get("steps") or request.form.get("num_steps")
361
+ if steps is None or str(steps).strip() == "":
362
+ steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
363
+ print(f"[APIv2] steps={steps}")
364
  t0 = time.time()
365
+ wav_tensor = generate_tts(
366
+ tts_model,
367
+ text,
368
+ audio_prompt_path=temp_wav_path,
369
+ exaggeration=exaggeration,
370
+ cfg_weight=cfg_weight,
371
+ language_id=lang,
372
+ steps=steps,
373
+ )
374
  print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
375
 
376
  # --- Stage 4: Format and Return Response Based on Request ---