# Dockerfile FROM python:3.11-slim ENV PYTHONUNBUFFERED=1 \ CMAKE_ARGS="-DLLAMA_AVX2=ON -DLLAMA_AVX=ON -DLLAMA_FMA=ON" \ FORCE_CMAKE=1 # System deps for llama.cpp compilation RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ git \ wget \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY app/ ./app/ EXPOSE 7860 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]