Spaces:

HFHash789
/

chatterbox-api

Running

App Files Files Community

HFHash789 commited on 10 days ago

Commit

d5e7e0d

verified ·

1 Parent(s): a46644d

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +5 -0
app.py +48 -6

README.md CHANGED Viewed

@@ -110,6 +110,11 @@ python app.py
    - OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
    - 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
 ## ⚡ 升级到 GPU 版本 (可选)
 如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡，并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)，您可以升级到 GPU 版本以获得显著的性能提升。

    - OpenAI 兼容 API: `https://<your-space>.hf.space/v1/audio/speech`
    - 声音克隆 API: `https://<your-space>.hf.space/v2/audio/speech_with_prompt`
+如果你是用一键脚本部署（同 `whisperx-api` 的方式），直接用 `chatterbox-api/chouxiang/deploy.py`，并在 `chatterbox-api/chouxiang/.env` 里设置：
+- `DEFAULT_STEPS=200`（CPU 建议 100–300，越小越快）
+- `TORCH_NUM_THREADS=2`
+- `TORCH_NUM_INTEROP_THREADS=1`
 ## ⚡ 升级到 GPU 版本 (可选)
 如果您的电脑配备了支持 CUDA 的 NVIDIA 显卡，并已正确安装 [NVIDIA 驱动](https://www.nvidia.com/Download/index.aspx) 和 [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)，您可以升级到 GPU 版本以获得显著的性能提升。

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os,time,shutil,sys
 #os.environ['htts_proxy']='http://127.0.0.1:10808'
 #os.environ['htt_proxy']='http://127.0.0.1:10808'
 from pathlib import Path
 import threading
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -22,10 +23,10 @@ threads = _env_int("THREADS", 4)
 ROOT_DIR=Path(os.getcwd()).as_posix()
 # 对于国内用户，使用Hugging Face镜像能显著提高下载速度
-os.environ['HF_HOME'] = ROOT_DIR + "/models"
-os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'true'
-os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = 'true'
-os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = "1200"
 import subprocess,traceback
 import io
@@ -157,6 +158,24 @@ model = None
 model_lock = threading.Lock()
 app = Flask(__name__)
 def get_model():
     global model
     if model is not None:
@@ -241,8 +260,19 @@ def tts_openai_compatible():
     try:
         # 生成WAV音频
         tts_model = get_model()
         t0 = time.time()
-        wav_tensor = tts_model.generate(text,exaggeration=exaggeration,cfg_weight=cfg_weight,language_id=lang)
         print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
         # 检查请求的响应格式，默认为mp3
@@ -327,8 +357,20 @@ def tts_with_prompt():
         # --- Stage 3: Generate TTS using the converted WAV file ---
         print(f"   - Generating TTS with prompt: {temp_wav_path}")
         tts_model = get_model()
         t0 = time.time()
-        wav_tensor = tts_model.generate(text, audio_prompt_path=temp_wav_path,exaggeration=exaggeration,cfg_weight=cfg_weight,language_id=lang)
         print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
         # --- Stage 4: Format and Return Response Based on Request ---

 #os.environ['htts_proxy']='http://127.0.0.1:10808'
 #os.environ['htt_proxy']='http://127.0.0.1:10808'
 from pathlib import Path
+import inspect
 import threading
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
 ROOT_DIR=Path(os.getcwd()).as_posix()
 # 对于国内用户，使用Hugging Face镜像能显著提高下载速度
+os.environ.setdefault('HF_HOME', ROOT_DIR + "/models")
+os.environ.setdefault('HF_HUB_DISABLE_SYMLINKS_WARNING', 'true')
+os.environ.setdefault('HF_HUB_DISABLE_PROGRESS_BARS', 'true')
+os.environ.setdefault('HF_HUB_DOWNLOAD_TIMEOUT', "1200")
 import subprocess,traceback
 import io
 model_lock = threading.Lock()
 app = Flask(__name__)
+def generate_tts(tts_model, text, *, steps=None, **kwargs):
+    sig = None
+    try:
+        sig = inspect.signature(tts_model.generate)
+    except Exception:
+        sig = None
+    if steps is not None and sig is not None:
+        for name in ("steps", "num_steps", "n_steps", "sampling_steps", "num_inference_steps"):
+            if name in sig.parameters:
+                kwargs[name] = int(steps)
+                break
+    if sig is not None:
+        kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
+    return tts_model.generate(text, **kwargs)
 def get_model():
     global model
     if model is not None:
     try:
         # 生成WAV音频
         tts_model = get_model()
+        steps = data.get("steps", data.get("num_steps", None))
+        if steps is None:
+            steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
+        print(f"[APIv1] steps={steps}")
         t0 = time.time()
+        wav_tensor = generate_tts(
+            tts_model,
+            text,
+            exaggeration=exaggeration,
+            cfg_weight=cfg_weight,
+            language_id=lang,
+            steps=steps,
+        )
         print(f"[APIv1] generate() done in {time.time()-t0:.2f}s")
         # 检查请求的响应格式，默认为mp3
         # --- Stage 3: Generate TTS using the converted WAV file ---
         print(f"   - Generating TTS with prompt: {temp_wav_path}")
         tts_model = get_model()
+        steps = request.form.get("steps") or request.form.get("num_steps")
+        if steps is None or str(steps).strip() == "":
+            steps = _env_int("DEFAULT_STEPS", 200 if (not torch.cuda.is_available()) else 1000)
+        print(f"[APIv2] steps={steps}")
         t0 = time.time()
+        wav_tensor = generate_tts(
+            tts_model,
+            text,
+            audio_prompt_path=temp_wav_path,
+            exaggeration=exaggeration,
+            cfg_weight=cfg_weight,
+            language_id=lang,
+            steps=steps,
+        )
         print(f"[APIv2] generate() done in {time.time()-t0:.2f}s")
         # --- Stage 4: Format and Return Response Based on Request ---