euIaxs22 commited on
Commit
f793dc0
·
verified ·
1 Parent(s): cd95591

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +150 -0
  2. start.sh +155 -0
Dockerfile ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # ADUC-SDR Video Suite — High-Perf Diffusers for 8× L40S (SM 8.9)
3
+ # CUDA 12.8 | PyTorch 2.8.0+cu128 | Ubuntu 22.04
4
+ # =============================================================================
5
+ FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
6
+
7
+ LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
8
+ LABEL description="High-performance Diffusers stack with FA2/SDPA, 8×L40S"
9
+ LABEL version="4.4.0"
10
+ LABEL cuda_version="12.8.0"
11
+ LABEL python_version="3.10"
12
+ LABEL pytorch_version="2.8.0+cu128"
13
+ LABEL gpu_optimized_for="8x_NVIDIA_L40S"
14
+
15
+ # ---------------- Core env & caches ----------------
16
+ ENV DEBIAN_FRONTEND=noninteractive TZ=UTC LANG=C.UTF-8 LC_ALL=C.UTF-8 \
17
+ PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 \
18
+ PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
19
+
20
+ # GPU/Compute
21
+ ENV NVIDIA_VISIBLE_DEVICES=all
22
+ ENV CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
23
+ ENV TORCH_CUDA_ARCH_LIST="8.9"
24
+ ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
25
+ ENV CUDA_DEVICE_MAX_CONNECTIONS=32
26
+ ENV CUDA_MODULE_LOADING=LAZY
27
+
28
+ # Threads
29
+ ENV OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 MAX_JOBS=48
30
+
31
+ # Alloc/caches
32
+ ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
33
+ ENV CUDA_LAUNCH_BLOCKING=0 CUDA_CACHE_MAXSIZE=2147483648 CUDA_CACHE_DISABLE=0
34
+
35
+ # NCCL single-node sane defaults (use IB=1/IFNAME em clusters com IB)
36
+ ENV NCCL_DEBUG=INFO NCCL_ASYNC_ERROR_HANDLING=1 NCCL_P2P_DISABLE=0 NCCL_IB_DISABLE=1 \
37
+ NCCL_MIN_NCHANNELS=8 NCCL_NTHREADS=256 NCCL_SOCKET_IFNAME=lo
38
+
39
+ # Hugging Face caches
40
+ ENV APP_HOME=/app
41
+ WORKDIR $APP_HOME
42
+ ENV TORCH_HOME=/app/.cache/torch
43
+ ENV HF_HOME=/app/.cache/huggingface
44
+ ENV HF_DATASETS_CACHE=/app/.cache/datasets
45
+ ENV TRANSFORMERS_CACHE=/app/.cache/transformers
46
+ ENV DIFFUSERS_CACHE=/app/.cache/diffusers
47
+ ENV HF_HUB_ENABLE_HF_TRANSFER=1
48
+ ENV TOKENIZERS_PARALLELISM=false
49
+
50
+ # FlashAttention / SDPA defaults
51
+ ENV FLASH_ATTENTION_DISABLE=0 \
52
+ FLASH_ATTENTION_FORCE_FP16=1 \
53
+ ATTN_FORCE_F16=1 \
54
+ ENABLE_FLASH_SDP=1 \
55
+ ENABLE_MEMORY_EFFICIENT_SDP=1 \
56
+ ENABLE_MATH_SDP=0 \
57
+ XFORMERS_FORCE_DISABLE=1 \
58
+ TORCH_DTYPE=bfloat16 \
59
+ NVIDIA_TF32_OVERRIDE=0 \
60
+ FA_LOG_LEVEL=WARNING
61
+
62
+ # Link de modelos
63
+
64
+
65
+ ENV MODELS_DIR=/app/models
66
+ RUN mkdir -p /home/user/.cache/models && ln -sf /home/user/.cache/models /app/models
67
+
68
+ # ---------------- Sistema & Python ----------------
69
+ RUN apt-get update && apt-get install -y --no-install-recommends \
70
+ build-essential cmake git git-lfs curl wget ffmpeg ninja-build \
71
+ python3.10 python3.10-dev python3.10-distutils python3-pip \
72
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
73
+
74
+ RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
75
+ ln -sf /usr/bin/python3.10 /usr/bin/python && \
76
+ python3 -m pip install --upgrade pip
77
+
78
+
79
+ # ---------------- PyTorch cu128 (pinado) ----------------
80
+ RUN pip install --index-url https://download.pytorch.org/whl/cu128 \
81
+ torch==2.8.0+cu128 torchvision==0.23.0+cu128 torchaudio==2.8.0+cu128
82
+
83
+ # ---------------- Toolchain, Triton, FA2 (sem bnb) ----------------
84
+ RUN pip install packaging ninja cmake pybind11 scikit-build cython hf_transfer numpy==1.24.4
85
+
86
+ # Triton 3.x (sem triton.ops)
87
+ RUN pip uninstall -y triton || true && \
88
+ pip install -v --no-build-isolation triton==3.4.0
89
+
90
+ # FlashAttention 2.8.x
91
+ #RUN pip install flash-attn==2.8.3 --no-build-isolation || \
92
+ # pip install flash-attn==2.8.2 --no-build-isolation || \
93
+ # pip install flash-attn==2.8.1 --no-build-isolation || \
94
+ # pip install flash-attn==2.8.0.post2 --no-build-isolation
95
+
96
+ # Diffusers/Transformers estáveis (sem dev)
97
+ RUN pip install --no-cache-dir diffusers==0.31.0 transformers==4.44.2 accelerate==0.34.2 omegaconf==2.3.0
98
+
99
+ # Opcional: seu fork de otimizações
100
+ # RUN pip install -U git+https://github.com/carlex22/diffusers-aduc-sdr
101
+
102
+ # ---------------- Repositórios auxiliares ----------------
103
+ RUN git clone https://github.com/bytedance-seed/VINCIE.git && \
104
+ cp -r VINCIE/configs/. /app/configs/
105
+
106
+ # Exemplos adicionais (descomente se precisar)
107
+ RUN git clone https://github.com/bytedance-seed/SeedVR.git && \
108
+ cp -r SeedVR/configs_3b /app/configs_3b
109
+
110
+ RUN git clone https://github.com/hkchengrex/MMAudio.git && \
111
+ cp -r MMAudio/mmaudio /app/mmaudio
112
+
113
+ RUN git clone https://github.com/Lightricks/LTX-Video.git && \
114
+ cp -r LTX-Video/ltx_video /app/ltx_video
115
+
116
+ # ---------------- Dependências da aplicação ----------------
117
+ COPY requirements.txt ./requirements.txt
118
+ RUN pip install --no-cache-dir -r requirements.txt
119
+
120
+
121
+ RUN pip install --upgrade bitsandbytes
122
+
123
+ # Scripts e configs
124
+ COPY info.sh ./info.sh
125
+ COPY start.sh ./start.sh
126
+ COPY /configs ./configs
127
+
128
+ # ---------------- Código e permissões ----------------
129
+ COPY . .
130
+ RUN useradd -m -u 1000 -s /bin/bash appuser && \
131
+ chown -R appuser:appuser /app && \
132
+ chmod 0755 /app/start.sh /app/info.sh || true
133
+
134
+ USER appuser
135
+
136
+ # Declara volume persistente para HF Spaces
137
+ VOLUME /data
138
+
139
+ # Env vars para caches em /data
140
+ ENV HF_HOME=/data/.cache/huggingface
141
+ ENV TORCH_HOME=/data/.cache/torch
142
+ ENV HF_DATASETS_CACHE=/data/.cache/datasets
143
+ ENV TRANSFORMERS_CACHE=/data/.cache/transformers
144
+ ENV DIFFUSERS_CACHE=/data/.cache/diffusers
145
+
146
+
147
+
148
+ # ---------------- Entry ----------------
149
+ ENTRYPOINT ["./start.sh"]
150
+ CMD ["gradio"]
start.sh ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ echo "======================================================="
5
+ echo " ADUC-SDR — Start (VINCIE/SeedVR, 8× L40S)"
6
+ echo "======================================================="
7
+
8
+ # ---------------------- Env base ----------------------
9
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}"
10
+ export TORCH_DTYPE="${TORCH_DTYPE:-bfloat16}"
11
+
12
+ # SDPA / FA toggles
13
+ export ENABLE_FLASH_SDP="${ENABLE_FLASH_SDP:-1}"
14
+ export ENABLE_MEMORY_EFFICIENT_SDP="${ENABLE_MEMORY_EFFICIENT_SDP:-1}"
15
+ export ENABLE_MATH_SDP="${ENABLE_MATH_SDP:-0}"
16
+ export FLASH_ATTENTION_DISABLE="${FLASH_ATTENTION_DISABLE:-0}"
17
+ export XFORMERS_FORCE_DISABLE="${XFORMERS_FORCE_DISABLE:-1}"
18
+
19
+ # CUDA / NCCL baseline
20
+ export CUDA_MODULE_LOADING="LAZY"
21
+ export CUDA_DEVICE_MAX_CONNECTIONS="${CUDA_DEVICE_MAX_CONNECTIONS:-32}"
22
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
23
+ export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:512,garbage_collection_threshold:0.8"
24
+ export OMP_NUM_THREADS="${OMP_NUM_THREADS:-8}"
25
+ export MKL_NUM_THREADS="${MKL_NUM_THREADS:-8}"
26
+ export NCCL_DEBUG="INFO"
27
+ export NCCL_ASYNC_ERROR_HANDLING=1
28
+ export NCCL_P2P_DISABLE=0
29
+ export NCCL_IB_DISABLE=1
30
+ export NCCL_SOCKET_IFNAME="lo"
31
+ export NCCL_BLOCKING_WAIT=1
32
+ export TORCH_NCCL_BLOCKING_WAIT=1
33
+ export NCCL_TIMEOUT=600
34
+
35
+ # ---------------------- Persistência HF/torch ----------------------
36
+ if [ -d /data ]; then
37
+ export HF_HOME="/data/.cache/huggingface"
38
+ export TORCH_HOME="/data/.cache/torch"
39
+ else
40
+ export HF_HOME="/app/.cache/huggingface"
41
+ export TORCH_HOME="/app/.cache/torch"
42
+ fi
43
+ export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
44
+ mkdir -p "$HF_HUB_CACHE" "$TORCH_HOME"
45
+
46
+ mkdir -p /app/.cache
47
+ ln -sf "$HF_HOME" /app/.cache/huggingface
48
+ unset TRANSFORMERS_CACHE
49
+
50
+ export HF_HUB_ENABLE_HF_TRANSFER=1
51
+ export HF_HUB_DOWNLOAD_TIMEOUT=60
52
+
53
+ MODEL_REPO="ByteDance-Seed/VINCIE-3B"
54
+ CACHE_MODEL_DIR="$HF_HUB_CACHE/models--ByteDance-Seed--VINCIE-3B"
55
+ CKPT_DIR="/app/ckpt/VINCIE-3B"
56
+ mkdir -p "$CKPT_DIR"
57
+
58
+ # ---------------------- Download: cache estruturado ou direto ----------------------
59
+ if [ "${DIRECT_TO_CKPT:-0}" -eq 1 ]; then
60
+ echo "[direct] Baixando ${MODEL_REPO} diretamente para $CKPT_DIR"
61
+ python - <<'PY'
62
+ import os
63
+ from pathlib import Path
64
+ from huggingface_hub import snapshot_download
65
+ repo_id = "ByteDance-Seed/VINCIE-3B"
66
+ ckpt_dir = Path("/app/ckpt/VINCIE-3B")
67
+ ckpt_dir.mkdir(parents=True, exist_ok=True)
68
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_TOKEN")
69
+ snapshot_download(
70
+ repo_id=repo_id,
71
+ local_dir=str(ckpt_dir),
72
+ local_dir_use_symlinks=False,
73
+ resume_download=True,
74
+ token=token,
75
+ )
76
+ print("[direct] Snapshot materializado em", ckpt_dir)
77
+ PY
78
+ else
79
+ echo "Verificando snapshot do ${MODEL_REPO} no cache..."
80
+ python - <<'PY'
81
+ import os
82
+ from pathlib import Path
83
+ from huggingface_hub import snapshot_download
84
+ hf_home = os.environ.get("HF_HOME","/app/.cache/huggingface")
85
+ cache_dir = os.path.join(hf_home, "hub")
86
+ os.makedirs(cache_dir, exist_ok=True)
87
+ repo_id = "ByteDance-Seed/VINCIE-3B"
88
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_TOKEN")
89
+ snapshot_download(
90
+ repo_id=repo_id,
91
+ cache_dir=cache_dir,
92
+ resume_download=True,
93
+ local_dir_use_symlinks=False,
94
+ token=token,
95
+ )
96
+ mcache = Path(cache_dir) / ("models--" + repo_id.replace("/", "--"))
97
+ #print("[cache] Estrutura em:", mcache)
98
+ #print("[cache] refs:", list((mcache/"refs").glob("*")))
99
+ #print("[cache] snapshots:", [p.name for p in (mcache/"snapshots").glob("*") if p.is_dir()])
100
+ PY
101
+ fi
102
+
103
+
104
+ python3 - <<'PY'
105
+ from huggingface_hub import snapshot_download
106
+ import os
107
+ save_dir = '/app/ckpt/VINCIE-3B'
108
+ os.makedirs(save_dir, exist_ok=True)
109
+ try:
110
+ print('📥 Baixando VINCIE-3B...')
111
+ snapshot_download(
112
+ repo_id='ByteDance-Seed/VINCIE-3B',
113
+ local_dir=save_dir,
114
+ cache_dir=cache_dir,
115
+ #resume_download=True,
116
+ #local_dir_use_symlinks=False
117
+ )
118
+ print('✅ Modelo ok')
119
+ except Exception as e:
120
+ print(f'⚠️ Download falhou: {e}')
121
+ PY
122
+
123
+ mkdir -p /app/VINCIE/ckpt
124
+ ln -sfn /app/ckpt/VINCIE-3B /app/VINCIE/ckpt/VINCIE-3B
125
+
126
+
127
+
128
+ #echo "[diag] Cache model dir: $CACHE_MODEL_DIR"
129
+ #ls -la "$CACHE_MODEL_DIR" || true
130
+ #echo "[diag] refs:"; ls -la "$CACHE_MODEL_DIR/refs" || true
131
+ #echo "[diag] snapshots:"; ls -la "$CACHE_MODEL_DIR/snapshots" || true
132
+ #echo "[diag] CKPT_DIR: $CKPT_DIR"; ls -la "$CKPT_DIR" || true
133
+
134
+ # ---------------------- Builder Apex/Q8 ----------------------
135
+ if nvidia-smi >/dev/null 2>&1; then
136
+ if [ "${DISABLE_BUILDER:-0}" -eq 0 ]; then
137
+ echo "Executando builder Apex/Q8..."
138
+ chmod +x /app/builder.sh || true
139
+ timeout "${BUILDER_TIMEOUT_SEC:-7200}" bash -lc /app/builder.sh || echo "Builder excedeu tempo/erro, prosseguindo."
140
+ else
141
+ echo "Builder desabilitado por DISABLE_BUILDER=1"
142
+ fi
143
+ else
144
+ echo "GPU não visível, pulando builder Apex/Q8."
145
+ fi
146
+
147
+ # ---------------------- Diagnóstico ----------------------
148
+ /app/info.sh || true
149
+ #ls -la /app || true
150
+ #ls -R /app | head -n 2000 || true
151
+
152
+ # ---------------------- Subindo serviço ----------------------
153
+ echo "🚀 Subindo serviços..."
154
+ # Dica: pode-se exportar VINCIE_DIRECT_TO_CKPT=1 para fallback interno
155
+ python /app/app_vince.py