Spaces:
Running
Running
更新 LLM 配置,增加 tensor_parallel_size,添加 served_model_name 和 tokenizer_mode
Browse files
app.py
CHANGED
|
@@ -4,8 +4,10 @@ from vllm import LLM, SamplingParams
|
|
| 4 |
llm = LLM(
|
| 5 |
model="stepfun-ai/Step-Audio-2-mini-Think", # 修改为你需要的模型
|
| 6 |
trust_remote_code=True,
|
| 7 |
-
tensor_parallel_size=
|
| 8 |
# gpu_memory_utilization=0.9, # GPU显存利用率
|
|
|
|
|
|
|
| 9 |
max_model_len=8192,
|
| 10 |
)
|
| 11 |
|
|
|
|
| 4 |
llm = LLM(
|
| 5 |
model="stepfun-ai/Step-Audio-2-mini-Think", # 修改为你需要的模型
|
| 6 |
trust_remote_code=True,
|
| 7 |
+
tensor_parallel_size=4, # 如果有多张GPU,设置并行数量
|
| 8 |
# gpu_memory_utilization=0.9, # GPU显存利用率
|
| 9 |
+
served_model_name="step-audio-2-mini-think",
|
| 10 |
+
tokenizer_mode="step_audio_2",
|
| 11 |
max_model_len=8192,
|
| 12 |
)
|
| 13 |
|