File size: 1,105 Bytes
447d423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
FROM python:3.10-slim

# Create user but don't switch yet
RUN useradd -m -u 1000 user

WORKDIR /app

# Install Tesseract OCR and system dependencies as root
RUN apt-get update && \
    apt-get install -y --no-install-recommends tesseract-ocr libglib2.0-0 libsm6 libxext6 libxrender-dev && \
    rm -rf /var/lib/apt/lists/*

# Now switch to the non-root user
USER user

# Copy requirements.txt
COPY --chown=user Backend/requirements.txt .

# Install Python dependencies (including nltk and langdetect)
RUN pip install --no-cache-dir --upgrade -r requirements.txt

# (Re)install nltk and download punkt as user, ensuring clean install and data in user dir
RUN pip install --upgrade --force-reinstall nltk && \
    python -m nltk.downloader punkt_tab

# Verify punkt is present (will print path in build logs)
RUN python -c "import nltk; print(nltk.data.find('tokenizers/punkt_tab'))"

# Set PATH for user-installed Python packages
ENV PATH="/home/user/.local/bin:${PATH}"

# Copy all backend code
COPY --chown=user Backend/ .

EXPOSE 7860

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]