| if [ -z "$QUANT_METHOD" ]; then | |
| echo "Using native precision"; | |
| python3 -m sglang.launch_server \ | |
| --model-path $MODEL_ID \ | |
| --kv-cache-dtype $KV_CACHE_DTYPE \ | |
| --tensor-parallel-size $TP_SIZE \ | |
| --expert-parallel-size $TP_SIZE \ | |
| --enable-torch-compile \ | |
| --enable-ep-moe \ | |
| --tool-call-parser qwen25 \ | |
| --host 0.0.0.0 \ | |
| --port 80; | |
| else | |
| echo "Using ${QUANT_METHOD} quantization schema"; | |
| python3 -m sglang.launch_server \ | |
| --model-path $MODEL_ID \ | |
| --kv-cache-dtype $KV_CACHE_DTYPE \ | |
| --tensor-parallel-size $TP_SIZE \ | |
| --expert-parallel-size $TP_SIZE \ | |
| --quantization $QUANT_METHOD \ | |
| --enable-torch-compile \ | |
| --enable-ep-moe \ | |
| --tool-call-parser qwen25 \ | |
| --host 0.0.0.0 \ | |
| --port 80; | |
| fi | |