Step-Audio-R1 / start_gradio.sh
moevis's picture
优化 Dockerfile,移除用户创建和不必要的文件复制;更新 start_gradio.sh 权限
533e479
raw
history blame
3.44 kB
#!/bin/bash
# 启动脚本:可选下载模型,并直接启动 Gradio 应用(内部调用 vLLM.LLM)
set -euo pipefail
MODEL_REPO="${MODEL_REPO:-stepfun-ai/Step-Audio-2-mini-Think}"
MODEL_DIR="${MODEL_DIR:-/tmp/app/models/Step-Audio-2-mini-Think}"
PRELOAD_MODEL="${PRELOAD_MODEL:-1}"
GRADIO_PORT=${GRADIO_PORT:-7860}
HOST=${HOST:-0.0.0.0}
TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-4}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-8192}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
TOKENIZER_MODE=${TOKENIZER_MODE:-step_audio_2}
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-step-audio-2-mini-think}
# 新增:设置缓存目录到可写位置
export HF_HOME="${HF_HOME:-/tmp/hf_cache}"
export XDG_CACHE_HOME="${XDG_CACHE_HOME:-/tmp/hf_cache}"
echo "=========================================="
echo "Step Audio 2 Gradio 启动脚本"
echo "MODEL_REPO: $MODEL_REPO"
echo "MODEL_DIR : $MODEL_DIR"
echo "PRELOAD_MODEL: $PRELOAD_MODEL"
echo "HOST/PORT: $HOST:$GRADIO_PORT"
echo "TP: $TENSOR_PARALLEL_SIZE | MAX_LEN: $MAX_MODEL_LEN"
echo "缓存目录: $HF_HOME"
echo "=========================================="
download_model() {
# 创建必要的目录
mkdir -p "$MODEL_DIR"
mkdir -p "$HF_HOME"
echo "[Download] 开始下载模型到: $MODEL_DIR"
echo "[Download] 缓存目录: $HF_HOME"
# 优先尝试使用 hf 命令(新版本推荐)
if command -v hf &> /dev/null; then
echo "[Download] 使用 hf download 命令"
hf download "$MODEL_REPO" --local-dir "$MODEL_DIR" --cache-dir "$HF_HOME"
elif command -v huggingface-cli &> /dev/null; then
echo "[Download] 使用 huggingface-cli"
huggingface-cli download "$MODEL_REPO" --local-dir "$MODEL_DIR" --local-dir-use-symlinks False
fi
}
if [[ "$PRELOAD_MODEL" == "1" ]]; then
# 检查模型是否完整(检查关键文件)
if [[ ! -d "$MODEL_DIR" ]] || [[ ! -f "$MODEL_DIR/config.json" ]] || [[ ! -f "$MODEL_DIR/model.safetensors.index.json" ]]; then
echo "模型未就绪或文件不完整,开始下载..."
download_model
else
echo "检测到本地模型: $MODEL_DIR"
echo "模型文件检查通过"
fi
export MODEL_PATH="$MODEL_DIR"
else
echo "跳过预下载,直接使用仓库名称加载"
export MODEL_PATH="${MODEL_PATH:-$MODEL_REPO}"
fi
# 验证下载结果
if [[ "$PRELOAD_MODEL" == "1" ]]; then
echo "=== 模型文件验证 ==="
ls -la "$MODEL_DIR" | head -10
if [[ -f "$MODEL_DIR/config.json" ]]; then
echo "✓ config.json 存在"
else
echo "✗ config.json 缺失"
fi
if [[ -f "$MODEL_DIR/model.safetensors.index.json" ]]; then
echo "✓ model.safetensors.index.json 存在"
else
echo "✗ model.safetensors.index.json 缺失"
fi
echo "==================="
fi
echo "模型路径: ${MODEL_PATH}"
echo "启动 Gradio..."
PYTHON_BIN="${PYTHON_BIN:-python3}"
if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then
echo "未找到 Python 解释器(当前设置: $PYTHON_BIN),请确认镜像已安装 python3。"
exit 1
fi
"$PYTHON_BIN" app.py \
--host "$HOST" \
--port "$GRADIO_PORT" \
--model "$MODEL_PATH" \
--tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \
--max-model-len "$MAX_MODEL_LEN" \
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
--tokenizer-mode "$TOKENIZER_MODE" \
--served-model-name "$SERVED_MODEL_NAME"