#!/bin/bash
# Direct vLLM serving script for DeepSWE-Preview-FP8 model

# Set CUDA devices
export CUDA_VISIBLE_DEVICES=1,2

# Source conda
source /home/op/miniconda3/etc/profile.d/conda.sh

# Activate the vllm-model environment
conda activate vllm-model

# Ensure pyparsing is available (needed by pydot)
# This was previously causing warnings about missing pyparsing module

# Run vLLM with all specified parameters
python -m vllm.entrypoints.openai.api_server \\
    --host 0.0.0.0 \\
    --port 8550 \\
    --model /home/op/DeepSWE-Preview-FP8 \\
    --max-model-len 28000 \\
    --tensor-parallel-size 2 \\
    --trust-remote-code