| FROM python:3.10-slim | |
| # Create user but don't switch yet | |
| RUN useradd -m -u 1000 user | |
| WORKDIR /app | |
| # Install Tesseract OCR and system dependencies as root | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends tesseract-ocr libglib2.0-0 libsm6 libxext6 libxrender-dev && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # Now switch to the non-root user | |
| USER user | |
| # Copy requirements.txt | |
| COPY --chown=user Backend/requirements.txt . | |
| # Install Python dependencies (including nltk and langdetect) | |
| RUN pip install --no-cache-dir --upgrade -r requirements.txt | |
| # (Re)install nltk and download punkt as user, ensuring clean install and data in user dir | |
| RUN pip install --upgrade --force-reinstall nltk && \ | |
| python -m nltk.downloader punkt_tab | |
| # Verify punkt is present (will print path in build logs) | |
| RUN python -c "import nltk; print(nltk.data.find('tokenizers/punkt_tab'))" | |
| # Set PATH for user-installed Python packages | |
| ENV PATH="/home/user/.local/bin:${PATH}" | |
| # Copy all backend code | |
| COPY --chown=user Backend/ . | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"] | |