hfendpoints-images
/

text-generation-sglang-gpu

Model card Files Files and versions

text-generation-sglang-gpu / entrypoint.sh

Morgan Funtowicz

misc: add semicolon to separate statements

2357c2a 7 months ago

history blame contribute delete

771 Bytes

	#!/bin/bash

	if [ -z "$QUANT_METHOD" ]; then
	echo "Using native precision";
	python3 -m sglang.launch_server \
	--model-path $MODEL_ID \
	--kv-cache-dtype $KV_CACHE_DTYPE \
	--tensor-parallel-size $TP_SIZE \
	--expert-parallel-size $TP_SIZE \
	--enable-torch-compile \
	--enable-ep-moe \
	--tool-call-parser qwen25 \
	--host 0.0.0.0 \
	--port 80;
	else
	echo "Using ${QUANT_METHOD} quantization schema";
	python3 -m sglang.launch_server \
	--model-path $MODEL_ID \
	--kv-cache-dtype $KV_CACHE_DTYPE \
	--tensor-parallel-size $TP_SIZE \
	--expert-parallel-size $TP_SIZE \
	--quantization $QUANT_METHOD \
	--enable-torch-compile \
	--enable-ep-moe \
	--tool-call-parser qwen25 \
	--host 0.0.0.0 \
	--port 80;
	fi