Kalpokoch commited on
Commit
4ab2b56
·
verified ·
1 Parent(s): 28a782e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -20
Dockerfile CHANGED
@@ -1,39 +1,36 @@
1
- # Use official Python image with basic system utilities
2
  FROM python:3.11-slim
3
 
4
- # Install system dependencies
5
  RUN apt-get update && apt-get install -y \
6
- wget \
7
- build-essential \
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Set working directory
11
  WORKDIR /app
12
 
13
- # Set HF cache dir for transformers, chromadb, etc.
14
- ENV TRANSFORMERS_CACHE=/app/.cache \
15
- HF_HOME=/app/.cache \
16
- PIP_NO_CACHE_DIR=true
17
 
18
- # Create required dirs with open permissions
19
- RUN mkdir -p /app/.cache /app/vector_database && chmod -R 777 /app/.cache /app/vector_database
 
20
 
21
- # Pre-download your quantized GGUF model from HF
22
- # (Replace with your exact file path if different)
23
- RUN wget https://huggingface.co/Kalpokoch/FinetunedQuantizedTinyLama/resolve/main/tinyllama_dop_q4_k_m.gguf -O /app/tinyllama_dop_q4_k_m.gguf
24
 
25
- # Install Python dependencies separately to leverage Docker caching
26
  COPY requirements.txt .
27
  RUN pip install -r requirements.txt
28
 
29
- # Install llama-cpp-python from wheel (avoid compilation)
30
- RUN pip install llama-cpp-python --prefer-binary
31
-
32
- # Copy the app code
33
  COPY . .
34
 
35
- # Expose port for FastAPI
 
 
 
 
36
  EXPOSE 7860
37
 
38
- # Launch app
39
  CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
1
  FROM python:3.11-slim
2
 
3
+ # Install required dependencies
4
  RUN apt-get update && apt-get install -y \
5
+ git curl build-essential cmake \
 
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
  # Set working directory
9
  WORKDIR /app
10
 
11
+ # Create cache and vector DB dir
12
+ RUN mkdir -p /app/.cache /app/vector_database && chmod -R 777 /app
 
 
13
 
14
+ # Set environment vars
15
+ ENV TRANSFORMERS_CACHE=/app/.cache \
16
+ HF_HOME=/app/.cache
17
 
18
+ # Preinstall llama-cpp-python with known working wheel version (skip build)
19
+ RUN pip install llama-cpp-python==0.2.61
 
20
 
21
+ # Install other dependencies
22
  COPY requirements.txt .
23
  RUN pip install -r requirements.txt
24
 
25
+ # Copy app code
 
 
 
26
  COPY . .
27
 
28
+ # Download GGUF model directly at container build time
29
+ RUN curl -L -o /app/tinyllama_dop_q4_k_m.gguf \
30
+ https://huggingface.co/Kalpokoch/FinetunedQuantizedTinyLama/resolve/main/tinyllama_dop_q4_k_m.gguf
31
+
32
+ # Expose app port
33
  EXPOSE 7860
34
 
35
+ # Run the app
36
  CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]