| # Use a lightweight Python base | |
| FROM python:3.10-slim | |
| WORKDIR /app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| libopenblas-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install llama-cpp-python from PREBUILT wheel (3 seconds vs 10+ minutes) | |
| RUN pip install \ | |
| https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl | |
| # Copy requirements first for cache | |
| COPY requirements.txt . | |
| # Install remaining requirements | |
| RUN pip install -r requirements.txt | |
| # Copy project files | |
| COPY . . | |
| # Setup environment variables for compilation | |
| # We need to find where pip installed llama-cpp-python to link against it | |
| # In docker, it's usually /usr/local/lib/python3.12/site-packages | |
| ENV SITE_PACKAGES=/usr/local/lib/python3.12/site-packages | |
| # Compile the engine | |
| WORKDIR /app/engine | |
| RUN g++ -O2 -shared -fPIC -o libbatch.so batch_server.cpp \ | |
| -I"${SITE_PACKAGES}/include" \ | |
| -L"${SITE_PACKAGES}/llama_cpp/lib" \ | |
| -lllama -Wl,-rpath,"${SITE_PACKAGES}/llama_cpp/lib" | |
| # Setup Model (Download during build or mount volume? | |
| # Best practice: Download in build if small, or use script at runtime. | |
| # Here we'll rely on the user mounting the model or running the setup script. | |
| # But for "Tunnel Code Optimized", let's assume valid model is present or downloaded. | |
| # We'll expose the setup script.) | |
| WORKDIR /app | |
| EXPOSE 8000 | |
| # Start command | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] | |