| # Direct vLLM serving script for DeepSWE-Preview-FP8 model | |
| # Set CUDA devices | |
| export CUDA_VISIBLE_DEVICES=1,2 | |
| # Source conda | |
| source /home/op/miniconda3/etc/profile.d/conda.sh | |
| # Activate the vllm-model environment | |
| conda activate vllm-model | |
| # Ensure pyparsing is available (needed by pydot) | |
| # This was previously causing warnings about missing pyparsing module | |
| # Run vLLM with all specified parameters | |
| python -m vllm.entrypoints.openai.api_server \\ | |
| --host 0.0.0.0 \\ | |
| --port 8550 \\ | |
| --model /home/op/DeepSWE-Preview-FP8 \\ | |
| --max-model-len 28000 \\ | |
| --tensor-parallel-size 2 \\ | |
| --trust-remote-code | |