| FROM ghcr.io/ggerganov/llama.cpp:server-cuda | |
| ENV LLAMA_ARG_CTX_SIZE=10000 | |
| ENV LLAMA_ARG_FLASH_ATTN=1 | |
| ENV LLAMA_ARG_HF_REPO=bartowski/Qwen2.5-Coder-14B-GGUF | |
| ENV LLAMA_ARG_HF_FILE=Qwen2.5-Coder-14B-Q4_K_M.gguf | |
| ENV LLAMA_ARG_N_PARALLEL=2 | |
| ENV LLAMA_ARG_PORT=7860 | |
| ENV LLAMA_ARG_HOST=0.0.0.0 | |
| RUN useradd -m -u 1000 user | |
| USER user | |