#!/bin/bash # Direct vLLM serving script for DeepSWE-Preview-FP8 model # Set CUDA devices export CUDA_VISIBLE_DEVICES=1,2 # Source conda source /home/op/miniconda3/etc/profile.d/conda.sh # Activate the vllm-model environment conda activate vllm-model # Ensure pyparsing is available (needed by pydot) # This was previously causing warnings about missing pyparsing module # Run vLLM with all specified parameters python -m vllm.entrypoints.openai.api_server \\ --host 0.0.0.0 \\ --port 8550 \\ --model /home/op/DeepSWE-Preview-FP8 \\ --max-model-len 28000 \\ --tensor-parallel-size 2 \\ --trust-remote-code