Spaces:

omaryasserhassan
/

llm_server

Sleeping

llm_server / Dockerfile

Update Dockerfile

14e7061 verified 4 months ago

903 Bytes

	FROM python:3.10-slim

	# Install system dependencies for llama.cpp
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	cmake \
	git \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# Copy requirement list first for caching
	COPY requirements.txt .

	# Install Python dependencies (llama-cpp-python compiled with BLAS disabled for HF CPU Spaces)
	RUN CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF" \
	pip install --no-cache-dir -r requirements.txt

	# Copy app files
	COPY . .

	# Pre-download model at build time to speed up startup
	RUN python -c "from huggingface_hub import hf_hub_download; \
	hf_hub_download(repo_id='bartowski/Llama-3.2-3B-Instruct-GGUF', \
	filename='Llama-3.2-3B-Instruct-Q4_K_M.gguf', \
	cache_dir='/app/models', local_dir_use_symlinks=False)"

	EXPOSE 7860

	CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]