DeepSWE-Preview-FP8 / serve_vllm.sh
groxaxo's picture
Upload folder using huggingface_hub
f0d12fa verified
#!/bin/bash
# Direct vLLM serving script for DeepSWE-Preview-FP8 model
# Set CUDA devices
export CUDA_VISIBLE_DEVICES=1,2
# Source conda
source /home/op/miniconda3/etc/profile.d/conda.sh
# Activate the vllm-model environment
conda activate vllm-model
# Ensure pyparsing is available (needed by pydot)
# This was previously causing warnings about missing pyparsing module
# Run vLLM with all specified parameters
python -m vllm.entrypoints.openai.api_server \\
--host 0.0.0.0 \\
--port 8550 \\
--model /home/op/DeepSWE-Preview-FP8 \\
--max-model-len 28000 \\
--tensor-parallel-size 2 \\
--trust-remote-code