Spaces:
Sleeping
Sleeping
| FROM python:3.10-slim | |
| # Install system dependencies for llama.cpp | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # Copy requirement list first for caching | |
| COPY requirements.txt . | |
| # Install Python dependencies (llama-cpp-python compiled with BLAS disabled for HF CPU Spaces) | |
| RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF" \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # Copy app files | |
| COPY . . | |
| # Pre-download model at build time to speed up startup | |
| RUN python -c "from huggingface_hub import hf_hub_download; \ | |
| hf_hub_download(repo_id='bartowski/Llama-3.2-3B-Instruct-GGUF', \ | |
| filename='Llama-3.2-3B-Instruct-Q4_K_M.gguf', \ | |
| cache_dir='/app/models', local_dir_use_symlinks=False)" | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |