EuuIia commited on
Commit
8382c42
·
verified ·
1 Parent(s): 0425d32

Upload ltx_server (5).py

Browse files
Files changed (1) hide show
  1. api/ltx_server (5).py +790 -0
api/ltx_server (5).py ADDED
@@ -0,0 +1,790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ltx_server.py — VideoService (beta 1.1)
2
+ # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
3
+ # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
4
+
5
+ # --- 0. WARNINGS E AMBIENTE ---
6
+ import warnings
7
+ warnings.filterwarnings("ignore", category=UserWarning)
8
+ warnings.filterwarnings("ignore", category=FutureWarning)
9
+ warnings.filterwarnings("ignore", message=".*")
10
+
11
+ from huggingface_hub import logging
12
+
13
+ logging.set_verbosity_error()
14
+ logging.set_verbosity_warning()
15
+ logging.set_verbosity_info()
16
+ logging.set_verbosity_debug()
17
+
18
+
19
+ LTXV_DEBUG=1
20
+ LTXV_FRAME_LOG_EVERY=8
21
+
22
+
23
+
24
+ # --- 1. IMPORTAÇÕES ---
25
+ import os, subprocess, shlex, tempfile
26
+ import torch
27
+ import numpy as np
28
+ import random
29
+ import os
30
+ import shlex
31
+ import yaml
32
+ from typing import List, Dict
33
+ from pathlib import Path
34
+ import imageio
35
+ import tempfile
36
+ from huggingface_hub import hf_hub_download
37
+ import sys
38
+ import subprocess
39
+ import gc
40
+ import shutil
41
+ import contextlib
42
+ import time
43
+ import traceback
44
+
45
+ # Singletons (versões simples)
46
+ from managers.vae_manager import vae_manager_singleton
47
+ from tools.video_encode_tool import video_encode_tool_singleton
48
+
49
+ # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
50
+ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
51
+ try:
52
+ import psutil
53
+ import pynvml as nvml
54
+ nvml.nvmlInit()
55
+ handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
56
+ try:
57
+ procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
58
+ except Exception:
59
+ procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
60
+ results = []
61
+ for p in procs:
62
+ pid = int(p.pid)
63
+ used_mb = None
64
+ try:
65
+ if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
66
+ used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
67
+ except Exception:
68
+ used_mb = None
69
+ name = "unknown"
70
+ user = "unknown"
71
+ try:
72
+ import psutil
73
+ pr = psutil.Process(pid)
74
+ name = pr.name()
75
+ user = pr.username()
76
+ except Exception:
77
+ pass
78
+ results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
79
+ nvml.nvmlShutdown()
80
+ return results
81
+ except Exception:
82
+ return []
83
+
84
+ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
85
+ cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
86
+ try:
87
+ out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
88
+ except Exception:
89
+ return []
90
+ results = []
91
+ for line in out.strip().splitlines():
92
+ parts = [p.strip() for p in line.split(",")]
93
+ if len(parts) >= 3:
94
+ try:
95
+ pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
96
+ user = "unknown"
97
+ try:
98
+ import psutil
99
+ pr = psutil.Process(pid)
100
+ user = pr.username()
101
+ except Exception:
102
+ pass
103
+ results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
104
+ except Exception:
105
+ continue
106
+ return results
107
+
108
+ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
109
+ if not processes:
110
+ return " - Processos ativos: (nenhum)\n"
111
+ processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
112
+ lines = [" - Processos ativos (PID | USER | NAME | VRAM MB):"]
113
+ for p in processes:
114
+ star = "*" if p["pid"] == current_pid else " "
115
+ used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
116
+ lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
117
+ return "\n".join(lines) + "\n"
118
+
119
+ def run_setup():
120
+ setup_script_path = "setup.py"
121
+ if not os.path.exists(setup_script_path):
122
+ print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
123
+ return
124
+ try:
125
+ print("[DEBUG] Executando setup.py para dependências...")
126
+ subprocess.run([sys.executable, setup_script_path], check=True)
127
+ print("[DEBUG] Setup concluído com sucesso.")
128
+ except subprocess.CalledProcessError as e:
129
+ print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
130
+ sys.exit(1)
131
+
132
+ from api.ltx.inference import (
133
+ create_ltx_video_pipeline,
134
+ create_latent_upsampler,
135
+ load_image_to_tensor_with_resize_and_crop,
136
+ seed_everething,
137
+ calculate_padding,
138
+ load_media_file,
139
+ )
140
+
141
+ DEPS_DIR = Path("/data")
142
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
143
+ if not LTX_VIDEO_REPO_DIR.exists():
144
+ print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
145
+ run_setup()
146
+
147
+ def add_deps_to_path():
148
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
149
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
150
+ sys.path.insert(0, repo_path)
151
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
152
+
153
+ add_deps_to_path()
154
+
155
+ # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
156
+
157
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
158
+ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
159
+
160
+ # --- 4. FUNÇÕES HELPER DE LOG ---
161
+ def log_tensor_info(tensor, name="Tensor"):
162
+ if not isinstance(tensor, torch.Tensor):
163
+ print(f"\n[INFO] '{name}' não é tensor.")
164
+ return
165
+ print(f"\n--- Tensor: {name} ---")
166
+ print(f" - Shape: {tuple(tensor.shape)}")
167
+ print(f" - Dtype: {tensor.dtype}")
168
+ print(f" - Device: {tensor.device}")
169
+ if tensor.numel() > 0:
170
+ try:
171
+ print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
172
+ except Exception:
173
+ pass
174
+ print("------------------------------------------\n")
175
+
176
+ # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
177
+ class VideoService:
178
+ def __init__(self):
179
+ t0 = time.perf_counter()
180
+ print("[DEBUG] Inicializando VideoService...")
181
+ self.debug = os.getenv("LTXV_DEBUG", "1") == "1"
182
+ self.frame_log_every = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
183
+ self.config = self._load_config()
184
+ print(f"[DEBUG] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
185
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
186
+ print(f"[DEBUG] Device selecionado: {self.device}")
187
+ self.last_memory_reserved_mb = 0.0
188
+ self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
189
+
190
+ self.pipeline, self.latent_upsampler = self._load_models()
191
+ print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
192
+
193
+ print(f"[DEBUG] Movendo modelos para {self.device}...")
194
+ self.pipeline.to(self.device)
195
+ if self.latent_upsampler:
196
+ self.latent_upsampler.to(self.device)
197
+
198
+ self._apply_precision_policy()
199
+ print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
200
+
201
+ # Injeta pipeline/vae no manager (impede vae=None)
202
+ vae_manager_singleton.attach_pipeline(
203
+ self.pipeline,
204
+ device=self.device,
205
+ autocast_dtype=self.runtime_autocast_dtype
206
+ )
207
+ print(f"[DEBUG] VAE manager conectado: has_vae={hasattr(self.pipeline, 'vae')} device={self.device}")
208
+
209
+ if self.device == "cuda":
210
+ torch.cuda.empty_cache()
211
+ self._log_gpu_memory("Após carregar modelos")
212
+
213
+ print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
214
+
215
+ def _log_gpu_memory(self, stage_name: str):
216
+ if self.device != "cuda":
217
+ return
218
+ device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
219
+ current_reserved_b = torch.cuda.memory_reserved(device_index)
220
+ current_reserved_mb = current_reserved_b / (1024 ** 2)
221
+ total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
222
+ total_memory_mb = total_memory_b / (1024 ** 2)
223
+ peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
224
+ delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
225
+ processes = _query_gpu_processes_via_nvml(device_index) or _query_gpu_processes_via_nvidiasmi(device_index)
226
+ print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
227
+ print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
228
+ if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
229
+ print(f" - Pico reservado (nesta fase): {peak_reserved_mb:.2f} MB")
230
+ print(_gpu_process_table(processes, os.getpid()), end="")
231
+ print("--------------------------------------------------\n")
232
+ self.last_memory_reserved_mb = current_reserved_mb
233
+
234
+ def _register_tmp_dir(self, d: str):
235
+ if d and os.path.isdir(d):
236
+ self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
237
+
238
+ def _register_tmp_file(self, f: str):
239
+ if f and os.path.exists(f):
240
+ self._tmp_files.add(f); print(f"[DEBUG] Registrado tmp file: {f}")
241
+
242
+ def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
243
+ print("[DEBUG] Finalize: iniciando limpeza...")
244
+ keep = set(keep_paths or []); extras = set(extra_paths or [])
245
+ removed_files = 0
246
+ for f in list(self._tmp_files | extras):
247
+ try:
248
+ if f not in keep and os.path.isfile(f):
249
+ os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
250
+ except Exception as e:
251
+ print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
252
+ finally:
253
+ self._tmp_files.discard(f)
254
+ removed_dirs = 0
255
+ for d in list(self._tmp_dirs):
256
+ try:
257
+ if d not in keep and os.path.isdir(d):
258
+ shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
259
+ except Exception as e:
260
+ print(f"[DEBUG] Falha removendo diretório {d}: {e}")
261
+ finally:
262
+ self._tmp_dirs.discard(d)
263
+ print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
264
+ gc.collect()
265
+ try:
266
+ if clear_gpu and torch.cuda.is_available():
267
+ torch.cuda.empty_cache()
268
+ try:
269
+ torch.cuda.ipc_collect()
270
+ except Exception:
271
+ pass
272
+ except Exception as e:
273
+ print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
274
+ try:
275
+ self._log_gpu_memory("Após finalize")
276
+ except Exception as e:
277
+ print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
278
+
279
+ def _load_config(self):
280
+ base = LTX_VIDEO_REPO_DIR / "configs"
281
+ candidates = [
282
+ base / "ltxv-13b-0.9.8-dev-fp8.yaml",
283
+ base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
284
+ base / "ltxv-13b-0.9.8-distilled.yaml",
285
+ ]
286
+ for cfg in candidates:
287
+ if cfg.exists():
288
+ print(f"[DEBUG] Config selecionada: {cfg}")
289
+ with open(cfg, "r") as file:
290
+ return yaml.safe_load(file)
291
+ cfg = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
292
+ print(f"[DEBUG] Config fallback: {cfg}")
293
+ with open(cfg, "r") as file:
294
+ return yaml.safe_load(file)
295
+
296
+ def _load_models(self):
297
+ t0 = time.perf_counter()
298
+ LTX_REPO = "Lightricks/LTX-Video"
299
+ print("[DEBUG] Baixando checkpoint principal...")
300
+ distilled_model_path = hf_hub_download(
301
+ repo_id=LTX_REPO,
302
+ filename=self.config["checkpoint_path"],
303
+ local_dir=os.getenv("HF_HOME"),
304
+ cache_dir=os.getenv("HF_HOME_CACHE"),
305
+ token=os.getenv("HF_TOKEN"),
306
+ )
307
+ self.config["checkpoint_path"] = distilled_model_path
308
+ print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
309
+
310
+ print("[DEBUG] Baixando upscaler espacial...")
311
+ spatial_upscaler_path = hf_hub_download(
312
+ repo_id=LTX_REPO,
313
+ filename=self.config["spatial_upscaler_model_path"],
314
+ local_dir=os.getenv("HF_HOME"),
315
+ cache_dir=os.getenv("HF_HOME_CACHE"),
316
+ token=os.getenv("HF_TOKEN"),
317
+ )
318
+ self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
319
+ print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
320
+
321
+ print("[DEBUG] Construindo pipeline...")
322
+ pipeline = create_ltx_video_pipeline(
323
+ ckpt_path=self.config["checkpoint_path"],
324
+ precision=self.config["precision"],
325
+ text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
326
+ sampler=self.config["sampler"],
327
+ device="cpu",
328
+ enhance_prompt=False,
329
+ prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
330
+ prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
331
+ )
332
+ print("[DEBUG] Pipeline pronto.")
333
+
334
+ latent_upsampler = None
335
+ if self.config.get("spatial_upscaler_model_path"):
336
+ print("[DEBUG] Construindo latent_upsampler...")
337
+ latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
338
+ print("[DEBUG] Upsampler pronto.")
339
+ print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
340
+ return pipeline, latent_upsampler
341
+
342
+ def _promote_fp8_weights_to_bf16(self, module):
343
+ if not isinstance(module, torch.nn.Module):
344
+ print("[DEBUG] Promoção FP8→BF16 ignorada: alvo não é nn.Module.")
345
+ return
346
+ f8 = getattr(torch, "float8_e4m3fn", None)
347
+ if f8 is None:
348
+ print("[DEBUG] torch.float8_e4m3fn indisponível.")
349
+ return
350
+ p_cnt = b_cnt = 0
351
+ for _, p in module.named_parameters(recurse=True):
352
+ try:
353
+ if p.dtype == f8:
354
+ with torch.no_grad():
355
+ p.data = p.data.to(torch.bfloat16); p_cnt += 1
356
+ except Exception:
357
+ pass
358
+ for _, b in module.named_buffers(recurse=True):
359
+ try:
360
+ if hasattr(b, "dtype") and b.dtype == f8:
361
+ b.data = b.data.to(torch.bfloat16); b_cnt += 1
362
+ except Exception:
363
+ pass
364
+ print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
365
+
366
+ def _apply_precision_policy(self):
367
+ prec = str(self.config.get("precision", "")).lower()
368
+ self.runtime_autocast_dtype = torch.float32
369
+ print(f"[DEBUG] Aplicando política de precisão: {prec}")
370
+ if prec == "float8_e4m3fn":
371
+ self.runtime_autocast_dtype = torch.bfloat16
372
+ force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
373
+ print(f"[DEBUG] FP8 detectado. force_promote={force_promote}")
374
+ if force_promote and hasattr(torch, "float8_e4m3fn"):
375
+ try:
376
+ self._promote_fp8_weights_to_bf16(self.pipeline)
377
+ except Exception as e:
378
+ print(f"[DEBUG] Promoção FP8→BF16 na pipeline falhou: {e}")
379
+ try:
380
+ if self.latent_upsampler:
381
+ self._promote_fp8_weights_to_bf16(self.latent_upsampler)
382
+ except Exception as e:
383
+ print(f"[DEBUG] Promoção FP8→BF16 no upsampler falhou: {e}")
384
+ elif prec == "bfloat16":
385
+ self.runtime_autocast_dtype = torch.bfloat16
386
+ elif prec == "mixed_precision":
387
+ self.runtime_autocast_dtype = torch.float16
388
+ else:
389
+ self.runtime_autocast_dtype = torch.float32
390
+
391
+ def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
392
+ print(f"[DEBUG] Carregando condicionamento: {filepath}")
393
+ tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
394
+ tensor = torch.nn.functional.pad(tensor, padding_values)
395
+ out = tensor.to(self.device, dtype=self.runtime_autocast_dtype) if self.device == "cuda" else tensor.to(self.device)
396
+ print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
397
+ return out
398
+
399
+
400
+ def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
401
+ """
402
+ Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
403
+
404
+ Args:
405
+ latents_brutos: tensor [B, C, T, H, W]
406
+ num_latente_por_chunk: número de latentes por chunk
407
+ overlap: número de frames que se sobrepõem entre chunks
408
+
409
+ Returns:
410
+ List[tensor]: lista de chunks cloneados
411
+ """
412
+ sum_latent = latents_brutos.shape[2]
413
+ chunks = []
414
+
415
+ if num_latente_por_chunk >= sum_latent:
416
+ return [latents_brutos.clone()]
417
+
418
+ steps = (sum_latent + num_latente_por_chunk - 1) // num_latente_por_chunk # ceil
419
+ print("================PODA CAUSAL=================")
420
+ print(f"[DEBUG] TOTAL LATENTES = {sum_latent}")
421
+ print(f"[DEBUG] Num LATENTES por chunk = {num_latente_por_chunk}")
422
+ print(f"[DEBUG] Número de chunks = {steps}")
423
+
424
+ for i in range(steps):
425
+ start = i * num_latente_por_chunk
426
+ end = start + num_latente_por_chunk
427
+ if i > 0:
428
+ start -= overlap # sobreposição
429
+ if end > sum_latent:
430
+ end = sum_latent
431
+ chunk = latents_brutos[:, :, start:end, :, :].clone()
432
+ chunks.append(chunk)
433
+ print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
434
+
435
+ print("================PODA CAUSAL=================")
436
+ return chunks
437
+
438
+
439
+
440
+ def _gerar_lista_com_transicoes(self, pasta: str, video_paths: List[str], crossfade_frames: int = 8) -> List[str]:
441
+ """
442
+ Gera uma nova lista de vídeos com cortes e transições de crossfade.
443
+ Cada transição é de 'crossfade_frames' frames.
444
+
445
+ Args:
446
+ pasta: pasta de trabalho
447
+ video_paths: lista de caminhos de vídeos originais
448
+ crossfade_frames: quantidade de frames para transição
449
+ Returns:
450
+ List[str]: nova lista de caminhos de vídeos, incluindo transições
451
+ """
452
+ nova_lista = []
453
+
454
+ pasta
455
+
456
+
457
+ for i in range(len(video_paths)):
458
+ video_atual = video_paths[i]
459
+ video_proximo = video_paths[i + 1] if i + 1 < len(video_paths) else None
460
+
461
+ # ---- 1. Video atual podado ----
462
+ if i == 0:
463
+ podado = os.path.join(pasta, f"podado_fim_{i+1}.mp4")
464
+ cmd_trim = f'ffmpeg -y -i "{video_atual}" -vf "trim=0:-{crossfade_frames},setpts=PTS-STARTPTS" "{podado}"'
465
+ elif video_proximo:
466
+ podado = os.path.join(pasta, f"podado_inicio_fim_{i+1}.mp4")
467
+ cmd_trim = f'ffmpeg -y -i "{video_atual}" -vf "trim={crossfade_frames}:-{crossfade_frames},setpts=PTS-STARTPTS" "{podado}"'
468
+ else:
469
+ podado = os.path.join(pasta, f"podado_inicio_{i+1}.mp4")
470
+ cmd_trim = f'ffmpeg -y -i "{video_atual}" -vf "trim={crossfade_frames}:,setpts=PTS-STARTPTS" "{podado}"'
471
+
472
+ subprocess.run(cmd_trim, shell=True, check=True)
473
+ nova_lista.append(podado)
474
+
475
+ # ---- 2. Gerar transição, se houver próximo vídeo ----
476
+ if video_proximo:
477
+ temp_fim = os.path.join(pasta, f"temp_fim_{i+1}.mp4")
478
+ cmd_fim = f'ffmpeg -y -i "{video_atual}" -vf "trim=-{crossfade_frames},setpts=PTS-STARTPTS" "{temp_fim}"'
479
+ subprocess.run(cmd_fim, shell=True, check=True)
480
+
481
+ temp_inicio = os.path.join(pasta, f"temp_inicio_{i+2}.mp4")
482
+ cmd_inicio = f'ffmpeg -y -i "{video_proximo}" -vf "trim=0:{crossfade_frames},setpts=PTS-STARTPTS" "{temp_inicio}"'
483
+ subprocess.run(cmd_inicio, shell=True, check=True)
484
+
485
+ transicao = os.path.join(pasta, f"transicao_{i+1}_{i+2}.mp4")
486
+ cmd_blend = f"""
487
+ ffmpeg -y -i "{temp_fim}" -i "{temp_inicio}" -filter_complex "
488
+ [0:v][1:v]blend=all_expr='A*(1-T/{crossfade_frames})+B*(T/{crossfade_frames})'[v]
489
+ " -map "[v]" -c:v libx264 -pix_fmt yuv420p "{transicao}"
490
+ """
491
+ subprocess.run(cmd_blend, shell=False, check=True)
492
+ nova_lista.append(transicao)
493
+
494
+ return nova_lista
495
+
496
+
497
+ def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
498
+ """
499
+ Concatena múltiplos MP4s sem reencode usando o demuxer do ffmpeg.
500
+ ATENÇÃO: todos os arquivos precisam ter mesmo codec, fps, resolução etc.
501
+ """
502
+ if not mp4_list or len(mp4_list) < 2:
503
+ raise ValueError("Forneça pelo menos dois arquivos MP4 para concatenar.")
504
+
505
+ # Cria lista temporária para o ffmpeg
506
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
507
+ for mp4 in mp4_list:
508
+ f.write(f"file '{os.path.abspath(mp4)}'\n")
509
+ list_path = f.name
510
+
511
+ cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
512
+ print(f"[DEBUG] Concat: {cmd}")
513
+
514
+ try:
515
+ subprocess.check_call(shlex.split(cmd))
516
+ finally:
517
+ try:
518
+ os.remove(list_path)
519
+ except Exception:
520
+ pass
521
+
522
+ def generate(
523
+ self,
524
+ prompt,
525
+ negative_prompt,
526
+ mode="text-to-video",
527
+ start_image_filepath=None,
528
+ middle_image_filepath=None,
529
+ middle_frame_number=None,
530
+ middle_image_weight=1.0,
531
+ end_image_filepath=None,
532
+ end_image_weight=1.0,
533
+ input_video_filepath=None,
534
+ height=512,
535
+ width=704,
536
+ duration=2.0,
537
+ frames_to_use=9,
538
+ seed=42,
539
+ randomize_seed=True,
540
+ guidance_scale=3.0,
541
+ improve_texture=True,
542
+ progress_callback=None,
543
+ # Sempre latent → VAE → MP4 (simples)
544
+ external_decode=True,
545
+ ):
546
+ t_all = time.perf_counter()
547
+ print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
548
+ if self.device == "cuda":
549
+ torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
550
+ self._log_gpu_memory("Início da Geração")
551
+
552
+ if mode == "image-to-video" and not start_image_filepath:
553
+ raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
554
+ if mode == "video-to-video" and not input_video_filepath:
555
+ raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")
556
+
557
+ used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
558
+ seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
559
+
560
+ FPS = 24.0; MAX_NUM_FRAMES = 2570
561
+ target_frames_rounded = round(duration * FPS)
562
+ n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
563
+ actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
564
+ print(f"[DEBUG] Frames alvo: {actual_num_frames} (dur={duration}s @ {FPS}fps)")
565
+
566
+ height_padded = ((height - 1) // 32 + 1) * 32
567
+ width_padded = ((width - 1) // 32 + 1) * 32
568
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
569
+ print(f"[DEBUG] Dimensões: ({height},{width}) -> pad ({height_padded},{width_padded}); padding={padding_values}")
570
+
571
+ generator = torch.Generator(device=self.device).manual_seed(used_seed)
572
+ conditioning_items = []
573
+
574
+ if mode == "image-to-video":
575
+ start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
576
+ conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
577
+ if middle_image_filepath and middle_frame_number is not None:
578
+ middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
579
+ safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
580
+ conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
581
+ if end_image_filepath:
582
+ end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
583
+ last_frame_index = actual_num_frames - 1
584
+ conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
585
+ print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
586
+
587
+ # Sempre pedimos latentes (simples)
588
+ call_kwargs = {
589
+ "prompt": prompt,
590
+ "negative_prompt": negative_prompt,
591
+ "height": height_padded,
592
+ "width": width_padded,
593
+ "num_frames": actual_num_frames,
594
+ "frame_rate": int(FPS),
595
+ "generator": generator,
596
+ "output_type": "latent",
597
+ "conditioning_items": conditioning_items if conditioning_items else None,
598
+ "media_items": None,
599
+ "decode_timestep": self.config["decode_timestep"],
600
+ "decode_noise_scale": self.config["decode_noise_scale"],
601
+ "stochastic_sampling": self.config["stochastic_sampling"],
602
+ "image_cond_noise_scale": 0.01,
603
+ "is_video": True,
604
+ "vae_per_channel_normalize": True,
605
+ "mixed_precision": (self.config["precision"] == "mixed_precision"),
606
+ "offload_to_cpu": False,
607
+ "enhance_prompt": False,
608
+ "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
609
+ }
610
+ print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
611
+
612
+ if mode == "video-to-video":
613
+ media = load_media_file(
614
+ media_path=input_video_filepath,
615
+ height=height,
616
+ width=width,
617
+ max_frames=int(frames_to_use),
618
+ padding=padding_values,
619
+ ).to(self.device)
620
+ call_kwargs["media_items"] = media
621
+ print(f"[DEBUG] media_items shape={tuple(media.shape)}")
622
+
623
+ latents = None
624
+ multi_scale_pipeline = None
625
+
626
+ try:
627
+ if improve_texture:
628
+ if not self.latent_upsampler:
629
+ raise ValueError("Upscaler espacial não carregado.")
630
+ print("[DEBUG] Multi-escala: construindo pipeline...")
631
+ multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
632
+ first_pass_args = self.config.get("first_pass", {}).copy()
633
+ first_pass_args["guidance_scale"] = float(guidance_scale)
634
+ second_pass_args = self.config.get("second_pass", {}).copy()
635
+ second_pass_args["guidance_scale"] = float(guidance_scale)
636
+
637
+ multi_scale_call_kwargs = call_kwargs.copy()
638
+ multi_scale_call_kwargs.update(
639
+ {
640
+ "downscale_factor": self.config["downscale_factor"],
641
+ "first_pass": first_pass_args,
642
+ "second_pass": second_pass_args,
643
+ }
644
+ )
645
+ print("[DEBUG] Chamando multi_scale_pipeline...")
646
+ t_ms = time.perf_counter()
647
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
648
+ with ctx:
649
+ result = multi_scale_pipeline(**multi_scale_call_kwargs)
650
+ print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
651
+
652
+ if hasattr(result, "latents"):
653
+ latents = result.latents
654
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
655
+ latents = result.images
656
+ else:
657
+ latents = result
658
+ print(f"[DEBUG] Latentes (multi-escala): shape={tuple(latents.shape)}")
659
+ else:
660
+ single_pass_kwargs = call_kwargs.copy()
661
+ first_pass_config = self.config.get("first_pass", {})
662
+ single_pass_kwargs.update(
663
+ {
664
+ "guidance_scale": float(guidance_scale),
665
+ "stg_scale": first_pass_config.get("stg_scale"),
666
+ "rescaling_scale": first_pass_config.get("rescaling_scale"),
667
+ "skip_block_list": first_pass_config.get("skip_block_list"),
668
+ }
669
+ )
670
+ schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
671
+ if mode == "video-to-video":
672
+ schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
673
+ if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
674
+ single_pass_kwargs["timesteps"] = schedule
675
+ single_pass_kwargs["guidance_timesteps"] = schedule
676
+ print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
677
+
678
+ print("\n[INFO] Executando pipeline de etapa única...")
679
+ t_sp = time.perf_counter()
680
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
681
+ with ctx:
682
+ result = self.pipeline(**single_pass_kwargs)
683
+ print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
684
+
685
+ if hasattr(result, "latents"):
686
+ latents = result.latents
687
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
688
+ latents = result.images
689
+ else:
690
+ latents = result
691
+ print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
692
+
693
+ # Staging e escrita MP4 (simples: VAE → pixels → MP4)
694
+
695
+ latents_cpu = latents.detach().to("cpu", non_blocking=True)
696
+ torch.cuda.empty_cache()
697
+ try:
698
+ torch.cuda.ipc_collect()
699
+ except Exception:
700
+ pass
701
+
702
+ # 2) Divide em duas partes
703
+ #lat_a, lat_b = self._dividir_latentes(latents_cpu)
704
+ #lat_a1, lat_a2 = self._dividir_latentes(lat_a)
705
+ #lat_b1, lat_b2 = self._dividir_latentes(lat_b)
706
+ #latents_parts = [lat_a1, lat_a2, lat_b1, lat_b2]
707
+ latents_parts = self._dividir_latentes_por_tamanho(latents_cpu,6,1)
708
+
709
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
710
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
711
+
712
+ partes_mp4 = []
713
+ par = 0
714
+
715
+ for latents in latents_parts:
716
+ print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
717
+
718
+ par = par + 1
719
+ output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
720
+ final_output_path = None
721
+
722
+ print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
723
+ # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
724
+ pixel_tensor = vae_manager_singleton.decode(
725
+ latents.to(self.device, non_blocking=True),
726
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
727
+ )
728
+ log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
729
+
730
+ print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
731
+ video_encode_tool_singleton.save_video_from_tensor(
732
+ pixel_tensor,
733
+ output_video_path,
734
+ fps=call_kwargs["frame_rate"],
735
+ progress_callback=progress_callback
736
+ )
737
+
738
+ candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
739
+ try:
740
+ shutil.move(output_video_path, candidate)
741
+ final_output_path = candidate
742
+ print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
743
+ partes_mp4.append(final_output_path)
744
+
745
+ except Exception as e:
746
+ final_output_path = output_video_path
747
+ print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
748
+
749
+ final_concat = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
750
+ final_concat_new = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=final_concat, crossfade_frames=8)
751
+ self._concat_mp4s_no_reencode(partes_mp4, final_concat_new)
752
+
753
+
754
+
755
+ self._log_gpu_memory("Fim da Geração")
756
+ return final_concat, used_seed
757
+
758
+
759
+ except Exception as e:
760
+ print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
761
+ print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
762
+ raise
763
+ finally:
764
+ try:
765
+ del latents
766
+ except Exception:
767
+ pass
768
+ try:
769
+ del multi_scale_pipeline
770
+ except Exception:
771
+ pass
772
+
773
+ gc.collect()
774
+ try:
775
+ if self.device == "cuda":
776
+ torch.cuda.empty_cache()
777
+ try:
778
+ torch.cuda.ipc_collect()
779
+ except Exception:
780
+ pass
781
+ except Exception as e:
782
+ print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
783
+
784
+ try:
785
+ self.finalize(keep_paths=[])
786
+ except Exception as e:
787
+ print(f"[DEBUG] finalize() no finally falhou: {e}")
788
+
789
+ print("Criando instância do VideoService. O carregamento do modelo começará agora...")
790
+ video_generation_service = VideoService()