Spaces:
Sleeping
Sleeping
| FROM nvidia/cuda:12.8.1-devel-ubuntu24.04 | |
| # Set working directory | |
| WORKDIR /app | |
| # Add build argument to conditionally skip llama.cpp build | |
| ARG SKIP_LLAMA_BUILD=false | |
| # Install system dependencies with noninteractive mode to avoid prompts | |
| RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ | |
| build-essential cmake git curl wget lsof vim unzip sqlite3 \ | |
| python3-pip python3-venv python3-full python3-poetry pipx \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && ln -sf /usr/bin/python3 /usr/bin/python | |
| # Create a virtual environment to avoid PEP 668 restrictions | |
| RUN python -m venv /app/venv | |
| ENV PATH="/app/venv/bin:$PATH" | |
| ENV VIRTUAL_ENV="/app/venv" | |
| # Use the virtual environment's pip to install packages | |
| RUN pip install --upgrade pip \ | |
| && pip install poetry \ | |
| && poetry config virtualenvs.create false | |
| # Create directories | |
| RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /app/run /app/resources | |
| # Copy dependency files - Files that rarely change | |
| COPY dependencies/graphrag-1.2.1.dev27.tar.gz /app/dependencies/ | |
| COPY dependencies/llama.cpp.zip /app/dependencies/ | |
| # Copy GPU checker script | |
| COPY docker/app/check_gpu_support.sh /app/ | |
| COPY docker/app/check_torch_cuda.py /app/ | |
| RUN chmod +x /app/check_gpu_support.sh | |
| # Unpack llama.cpp and build with CUDA support (conditionally, based on SKIP_LLAMA_BUILD) | |
| RUN if [ "$SKIP_LLAMA_BUILD" = "false" ]; then \ | |
| echo "=====================================================================" && \ | |
| echo "STARTING LLAMA.CPP BUILD WITH CUDA SUPPORT - THIS WILL TAKE SOME TIME" && \ | |
| echo "=====================================================================" && \ | |
| LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \ | |
| echo "Using local llama.cpp archive..." && \ | |
| unzip -q "$LLAMA_LOCAL_ZIP" && \ | |
| cd llama.cpp && \ | |
| mkdir -p build && \ | |
| cd build && \ | |
| echo "Starting CMake configuration with CUDA support..." && \ | |
| cmake -DGGML_CUDA=OFF -DLLAMA_CUBLAS=OFF \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DBUILD_SHARED_LIBS=OFF \ | |
| -DLLAMA_NATIVE=ON \ | |
| .. && \ | |
| echo "Starting build process (this will take several minutes)..." && \ | |
| cmake --build . --config Release -j --verbose && \ | |
| echo "Build completed successfully" && \ | |
| chmod +x /app/llama.cpp/build/bin/llama-server /app/llama.cpp/build/bin/llama-cli && \ | |
| echo "====================================================================" && \ | |
| echo "CUDA BUILD COMPLETED SUCCESSFULLY! GPU ACCELERATION IS NOW AVAILABLE" && \ | |
| echo "===================================================================="; \ | |
| else \ | |
| echo "=====================================================================" && \ | |
| echo "SKIPPING LLAMA.CPP BUILD (SKIP_LLAMA_BUILD=$SKIP_LLAMA_BUILD)" && \ | |
| echo "Using existing llama.cpp build from Docker volume" && \ | |
| echo "=====================================================================" && \ | |
| LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \ | |
| echo "Just unpacking llama.cpp archive (no build)..." && \ | |
| unzip -q "$LLAMA_LOCAL_ZIP" && \ | |
| cd llama.cpp && \ | |
| mkdir -p build; \ | |
| fi | |
| # Mark as GPU-optimized build for runtime reference | |
| RUN mkdir -p /app/data && \ | |
| echo "{ \"gpu_optimized\": true, \"optimized_on\": \"$(date -u +\"%Y-%m-%dT%H:%M:%SZ\")\" }" > /app/data/gpu_optimized.json && \ | |
| echo "Created GPU-optimized marker file" | |
| # Copy project configuration - Files that occasionally change | |
| COPY pyproject.toml README.md /app/ | |
| # Fix for potential package installation issues with Poetry | |
| RUN pip install --upgrade setuptools wheel | |
| RUN poetry install --no-interaction --no-root || poetry install --no-interaction --no-root --without dev | |
| RUN pip install --force-reinstall dependencies/graphrag-1.2.1.dev27.tar.gz | |
| # Copy source code - Files that frequently change | |
| COPY docker/ /app/docker/ | |
| COPY lpm_kernel/ /app/lpm_kernel/ | |
| # Check module import | |
| RUN python -c "import lpm_kernel; print('Module import check passed')" | |
| # Set environment variables | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PYTHONPATH=/app \ | |
| BASE_DIR=/app/data \ | |
| LOCAL_LOG_DIR=/app/logs \ | |
| RUN_DIR=/app/run \ | |
| RESOURCES_DIR=/app/resources \ | |
| APP_ROOT=/app \ | |
| FLASK_APP=lpm_kernel.app \ | |
| LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH | |
| # Expose ports | |
| EXPOSE 8002 8080 | |
| # Set the startup command | |
| CMD ["bash", "-c", "echo 'Checking SQLite database...' && if [ ! -s /app/data/sqlite/lpm.db ]; then echo 'SQLite database not found or empty, initializing...' && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db '.read /app/docker/sqlite/init.sql' && echo 'SQLite database initialized successfully' && echo 'Tables created:' && sqlite3 /app/data/sqlite/lpm.db '.tables'; else echo 'SQLite database already exists, skipping initialization'; fi && echo 'Checking ChromaDB...' && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo 'ChromaDB collections not found, initializing...' && python /app/docker/app/init_chroma.py && echo 'ChromaDB initialized successfully'; else echo 'ChromaDB already exists, skipping initialization'; fi && echo 'Starting application at ' $(date) >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"] |