rahul7star commited on
Commit
9d1bc12
Β·
verified Β·
1 Parent(s): 5d525ab

Update app_quant_latent.py

Browse files
Files changed (1) hide show
  1. app_quant_latent.py +265 -190
app_quant_latent.py CHANGED
@@ -5,60 +5,81 @@ import sys
5
  import platform
6
  import diffusers
7
  import transformers
 
8
  import os
9
- import torchvision.transforms as T
10
 
11
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
12
  from diffusers import ZImagePipeline, AutoModel
13
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
14
 
15
  # ============================================================
16
-
17
  # LOGGING BUFFER
18
-
19
  # ============================================================
20
-
21
  LOGS = ""
22
  def log(msg):
23
- global LOGS
24
- print(msg)
25
- LOGS += msg + "\n"
26
- return msg
27
 
 
 
 
28
  # ============================================================
 
 
 
29
 
30
- # ENVIRONMENT INFO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # ============================================================
33
 
 
 
 
34
  log("===================================================")
35
- log("πŸ” Z-IMAGE-TURBO DEBUGGING + ROBUST TRANSFORMER INSPECTION")
36
  log("===================================================\n")
37
 
38
- log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10), ' ')}")
39
  log(f"πŸ“Œ PLATFORM : {platform.platform()}")
40
- log(f"πŸ“Œ TORCH VERSION : {torch.**version**}")
41
- log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.**version**}")
42
- log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.**version**}")
43
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
44
 
45
- if torch.cuda.is_available():
46
- log(f"πŸ“Œ GPU NAME : {torch.cuda.get_device_name(0)}")
47
- log(f"πŸ“Œ GPU CAPABILITY : {torch.cuda.get_device_capability(0)}")
48
- log(f"πŸ“Œ GPU MEMORY (TOTAL) : {torch.cuda.get_device_properties(0).total_memory/1e9:.2f} GB")
49
- log(f"πŸ“Œ FLASH ATTENTION : {torch.backends.cuda.flash_sdp_enabled()}")
50
- else:
51
- raise RuntimeError("❌ CUDA is REQUIRED but not available.")
52
 
53
  device = "cuda"
54
  gpu_id = 0
55
 
56
  # ============================================================
57
-
58
  # MODEL SETTINGS
59
-
60
  # ============================================================
61
-
62
  model_cache = "./weights/"
63
  model_id = "Tongyi-MAI/Z-Image-Turbo"
64
  torch_dtype = torch.bfloat16
@@ -72,232 +93,286 @@ log(f"Model Cache Directory : {model_cache}")
72
  log(f"torch_dtype : {torch_dtype}")
73
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
74
 
75
- # ============================================================
76
 
77
- # ROBUST TRANSFORMER INSPECTION FUNCTION
78
 
79
  # ============================================================
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- def inspect_transformer(model, model_name="Transformer"):
82
- log(f"\nπŸ” {model_name} Architecture Details:")
83
- try:
84
- block_attrs = ["transformer_blocks", "blocks", "layers", "encoder_blocks", "model"]
85
- blocks = None
86
- for attr in block_attrs:
87
- blocks = getattr(model, attr, None)
88
- if blocks is not None:
89
- break
90
-
91
- ```
92
- if blocks is None:
93
- log(f"⚠️ Could not find transformer blocks in {model_name}, skipping detailed block info")
94
- else:
95
- try:
96
- log(f"Number of Transformer Modules : {len(blocks)}")
97
- for i, block in enumerate(blocks):
98
- log(f" Block {i}: {block.__class__.__name__}")
99
- attn_type = getattr(block, "attn", None)
100
- if attn_type:
101
- log(f" Attention: {attn_type.__class__.__name__}")
102
- flash_enabled = getattr(attn_type, "flash", None)
103
- log(f" FlashAttention Enabled? : {flash_enabled}")
104
- except Exception as e:
105
- log(f"⚠️ Error inspecting blocks: {e}")
106
-
107
- config = getattr(model, "config", None)
108
- if config:
109
- log(f"Hidden size: {getattr(config, 'hidden_size', 'N/A')}")
110
- log(f"Number of attention heads: {getattr(config, 'num_attention_heads', 'N/A')}")
111
- log(f"Number of layers: {getattr(config, 'num_hidden_layers', 'N/A')}")
112
- log(f"Intermediate size: {getattr(config, 'intermediate_size', 'N/A')}")
113
- else:
114
- log(f"⚠️ No config attribute found in {model_name}")
115
- except Exception as e:
116
- log(f"⚠️ Failed to inspect {model_name}: {e}")
117
- ```
118
 
 
 
 
119
  # ============================================================
 
 
 
 
 
120
 
121
- # LOAD TRANSFORMER BLOCK
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- # ============================================================
124
 
 
 
 
125
  log("\n===================================================")
126
  log("πŸ”§ LOADING TRANSFORMER BLOCK")
127
  log("===================================================")
128
 
129
- quantization_config = DiffusersBitsAndBytesConfig(
130
- load_in_4bit=True,
131
- bnb_4bit_quant_type="nf4",
132
- bnb_4bit_compute_dtype=torch_dtype,
133
- bnb_4bit_use_double_quant=True,
134
- llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
135
- )
136
- log("4-bit Quantization Config (Transformer):")
137
- log(str(quantization_config))
138
-
139
- transformer = AutoModel.from_pretrained(
140
- model_id,
141
- cache_dir=model_cache,
142
- subfolder="transformer",
143
- quantization_config=quantization_config,
144
- torch_dtype=torch_dtype,
145
- device_map=device,
146
- )
147
- log("βœ… Transformer block loaded successfully.")
148
- inspect_transformer(transformer, "Transformer")
149
 
150
- if USE_CPU_OFFLOAD:
151
- transformer = transformer.to("cpu")
 
152
 
153
- # ============================================================
154
 
155
- # LOAD TEXT ENCODER
 
156
 
157
- # ============================================================
158
 
 
 
 
159
  log("\n===================================================")
160
  log("πŸ”§ LOADING TEXT ENCODER")
161
  log("===================================================")
162
 
163
- quantization_config = TransformersBitsAndBytesConfig(
164
- load_in_4bit=True,
165
- bnb_4bit_quant_type="nf4",
166
- bnb_4bit_compute_dtype=torch_dtype,
167
- bnb_4bit_use_double_quant=True,
168
- )
169
- log("4-bit Quantization Config (Text Encoder):")
170
- log(str(quantization_config))
171
-
172
- text_encoder = AutoModel.from_pretrained(
173
- model_id,
174
- cache_dir=model_cache,
175
- subfolder="text_encoder",
176
- quantization_config=quantization_config,
177
- torch_dtype=torch_dtype,
178
- device_map=device,
179
- )
180
- log("βœ… Text encoder loaded successfully.")
181
- inspect_transformer(text_encoder, "Text Encoder")
182
 
183
- if USE_CPU_OFFLOAD:
184
- text_encoder = text_encoder.to("cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # ============================================================
 
 
187
 
188
- # BUILD PIPELINE
189
 
190
- # ============================================================
 
191
 
 
 
 
 
192
  log("\n===================================================")
193
- log("πŸ”§ BUILDING Z-IMAGE-TURBO PIPELINE")
194
  log("===================================================")
195
 
196
- pipe = ZImagePipeline.from_pretrained(
197
- model_id,
198
- transformer=transformer,
199
- text_encoder=text_encoder,
200
- torch_dtype=torch_dtype,
201
- )
202
 
203
- if USE_CPU_OFFLOAD:
204
- pipe.enable_model_cpu_offload(gpu_id=gpu_id)
205
- log("βš™ CPU OFFLOAD ENABLED")
206
- else:
207
- pipe.to(device)
208
- log("βš™ Pipeline moved to GPU")
 
 
 
209
 
210
- log("βœ… Pipeline ready.")
 
 
211
 
212
- # ============================================================
213
 
214
- # FUNCTION TO CONVERT LATENTS TO IMAGE
215
 
216
  # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- def latent_to_image(latent):
219
  try:
220
- img_tensor = pipe.vae.decode(latent)
221
- img_tensor = (img_tensor / 2 + 0.5).clamp(0, 1)
222
- pil_img = T.ToPILImage()(img_tensor[0])
223
- return pil_img
224
- except Exception as e:
225
- log(f"⚠️ Failed to decode latent: {e}")
226
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
- # ============================================================
 
 
229
 
230
- # REAL-TIME INFERENCE FUNCTION
231
 
232
- # ============================================================
233
 
234
  @spaces.GPU
235
- def generate_image_realtime(prompt, height, width, steps, seed):
236
  global LOGS
237
- LOGS = ""
 
 
238
  log("===================================================")
239
- log("🎨 RUNNING REAL-TIME INFERENCE")
240
  log("===================================================")
241
- log(f"Prompt : {prompt}")
242
- log(f"Resolution : {width} x {height}")
243
- log(f"Steps : {steps}")
244
- log(f"Seed : {seed}")
245
-
246
- ```
247
- generator = torch.Generator(device).manual_seed(seed)
248
- latent_history = []
249
-
250
- # Define callback to save latents and GPU info
251
- def save_latents(step, timestep, latents):
252
- latent_history.append(latents.detach().clone())
253
- gpu_mem = torch.cuda.memory_allocated(0)/1e9
254
- log(f"Step {step} - GPU Memory Used: {gpu_mem:.2f} GB")
255
-
256
- # Yield images step-by-step
257
- for step, img in pipe(
258
- prompt=prompt,
259
- height=height,
260
- width=width,
261
- num_inference_steps=steps,
262
- guidance_scale=0.0,
263
- generator=generator,
264
- callback=save_latents,
265
- callback_steps=1
266
- ).iter():
267
- current_latent = latent_history[-1] if latent_history else None
268
- latent_images = [latent_to_image(l) for l in latent_history if l is not None]
269
- yield img, latent_images, LOGS
270
- ```
271
 
272
- # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- # GRADIO UI
275
 
 
 
276
  # ============================================================
277
 
278
  with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
279
- gr.Markdown("# **πŸš€ Z-Image-Turbo β€” Real-Time Latent & Transformer Logs**")
280
 
281
- ```
282
  with gr.Row():
283
  with gr.Column(scale=1):
284
  prompt = gr.Textbox(label="Prompt", value="Realistic mid-aged male image")
285
  height = gr.Slider(256, 2048, value=1024, step=8, label="Height")
286
  width = gr.Slider(256, 2048, value=1024, step=8, label="Width")
287
- steps = gr.Slider(1, 16, value=9, step=1, label="Inference Steps")
288
- seed = gr.Slider(0, 999999, value=42, step=1, label="Seed")
289
- btn = gr.Button("Generate", variant="primary")
290
 
291
  with gr.Column(scale=1):
292
- output_image = gr.Image(label="Final Output Image")
293
- latent_gallery = gr.Gallery(label="Latent Evolution", elem_id="latent_gallery").style(grid=[2], height="auto")
294
- logs_panel = gr.Textbox(label="πŸ“œ Transformer & GPU Logs", lines=25, interactive=False)
295
 
296
- btn.click(
297
- generate_image_realtime,
298
  inputs=[prompt, height, width, steps, seed],
299
- outputs=[output_image, latent_gallery, logs_panel],
300
  )
301
- ```
302
 
303
- demo.launch()
 
 
5
  import platform
6
  import diffusers
7
  import transformers
8
+ import psutil
9
  import os
10
+ import time
11
 
12
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
13
  from diffusers import ZImagePipeline, AutoModel
14
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
15
 
16
  # ============================================================
 
17
  # LOGGING BUFFER
 
18
  # ============================================================
 
19
  LOGS = ""
20
  def log(msg):
21
+ global LOGS
22
+ print(msg)
23
+ LOGS += msg + "\n"
24
+ return msg
25
 
26
+
27
+ # ============================================================
28
+ # SYSTEM METRICS β€” LIVE GPU + CPU MONITORING
29
  # ============================================================
30
+ def log_system_stats(tag=""):
31
+ try:
32
+ log(f"\n===== πŸ”₯ SYSTEM STATS {tag} =====")
33
 
34
+ # ============= GPU STATS =============
35
+ if torch.cuda.is_available():
36
+ allocated = torch.cuda.memory_allocated(0) / 1e9
37
+ reserved = torch.cuda.memory_reserved(0) / 1e9
38
+ total = torch.cuda.get_device_properties(0).total_memory / 1e9
39
+ free = total - allocated
40
+
41
+ log(f"πŸ’  GPU Total : {total:.2f} GB")
42
+ log(f"πŸ’  GPU Allocated : {allocated:.2f} GB")
43
+ log(f"πŸ’  GPU Reserved : {reserved:.2f} GB")
44
+ log(f"πŸ’  GPU Free : {free:.2f} GB")
45
+
46
+ # ============= CPU STATS ============
47
+ cpu = psutil.cpu_percent()
48
+ ram_used = psutil.virtual_memory().used / 1e9
49
+ ram_total = psutil.virtual_memory().total / 1e9
50
+
51
+ log(f"🧠 CPU Usage : {cpu}%")
52
+ log(f"🧠 RAM Used : {ram_used:.2f} GB / {ram_total:.2f} GB")
53
+
54
+ except Exception as e:
55
+ log(f"⚠️ Failed to log system stats: {e}")
56
 
 
57
 
58
+ # ============================================================
59
+ # ENVIRONMENT INFO
60
+ # ============================================================
61
  log("===================================================")
62
+ log("πŸ” Z-IMAGE-TURBO DEBUGGING + LIVE METRIC LOGGER")
63
  log("===================================================\n")
64
 
65
+ log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10),' ')}")
66
  log(f"πŸ“Œ PLATFORM : {platform.platform()}")
67
+ log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
68
+ log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
69
+ log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
70
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
71
 
72
+ log_system_stats("AT STARTUP")
73
+
74
+ if not torch.cuda.is_available():
75
+ raise RuntimeError("❌ CUDA Required")
 
 
 
76
 
77
  device = "cuda"
78
  gpu_id = 0
79
 
80
  # ============================================================
 
81
  # MODEL SETTINGS
 
82
  # ============================================================
 
83
  model_cache = "./weights/"
84
  model_id = "Tongyi-MAI/Z-Image-Turbo"
85
  torch_dtype = torch.bfloat16
 
93
  log(f"torch_dtype : {torch_dtype}")
94
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
95
 
96
+ log_system_stats("BEFORE TRANSFORMER LOAD")
97
 
 
98
 
99
  # ============================================================
100
+ # FUNCTION TO CONVERT LATENTS TO IMAGE
101
+ # ============================================================
102
+ def latent_to_image(latent):
103
+ try:
104
+ img_tensor = pipe.vae.decode(latent)
105
+ img_tensor = (img_tensor / 2 + 0.5).clamp(0, 1)
106
+ pil_img = T.ToPILImage()(img_tensor[0])
107
+ return pil_img
108
+ except Exception as e:
109
+ log(f"⚠️ Failed to decode latent: {e}")
110
+ return None
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+
114
+ # ============================================================
115
+ # SAFE TRANSFORMER INSPECTION
116
  # ============================================================
117
+ def inspect_transformer(model, name):
118
+ log(f"\nπŸ” Inspecting {name}")
119
+ try:
120
+ candidates = ["transformer_blocks", "blocks", "layers", "encoder", "model"]
121
+ blocks = None
122
 
123
+ for attr in candidates:
124
+ if hasattr(model, attr):
125
+ blocks = getattr(model, attr)
126
+ break
127
+
128
+ if blocks is None:
129
+ log(f"⚠️ No block structure found in {name}")
130
+ return
131
+
132
+ if hasattr(blocks, "__len__"):
133
+ log(f"Total Blocks = {len(blocks)}")
134
+ else:
135
+ log("⚠️ Blocks exist but are not iterable")
136
+
137
+ for i in range(min(10, len(blocks) if hasattr(blocks, "__len__") else 0)):
138
+ log(f"Block {i} = {blocks[i].__class__.__name__}")
139
+
140
+ except Exception as e:
141
+ log(f"⚠️ Transformer inspect error: {e}")
142
 
 
143
 
144
+ # ============================================================
145
+ # LOAD TRANSFORMER β€” WITH LIVE STATS
146
+ # ============================================================
147
  log("\n===================================================")
148
  log("πŸ”§ LOADING TRANSFORMER BLOCK")
149
  log("===================================================")
150
 
151
+ log("πŸ“Œ Logging memory before load:")
152
+ log_system_stats("START TRANSFORMER LOAD")
153
+
154
+ try:
155
+ quant_cfg = DiffusersBitsAndBytesConfig(
156
+ load_in_4bit=True,
157
+ bnb_4bit_quant_type="nf4",
158
+ bnb_4bit_compute_dtype=torch_dtype,
159
+ bnb_4bit_use_double_quant=True,
160
+ )
161
+
162
+ transformer = AutoModel.from_pretrained(
163
+ model_id,
164
+ cache_dir=model_cache,
165
+ subfolder="transformer",
166
+ quantization_config=quant_cfg,
167
+ torch_dtype=torch_dtype,
168
+ device_map=device,
169
+ )
170
+ log("βœ… Transformer loaded successfully.")
171
 
172
+ except Exception as e:
173
+ log(f"❌ Transformer load failed: {e}")
174
+ transformer = None
175
 
176
+ log_system_stats("AFTER TRANSFORMER LOAD")
177
 
178
+ if transformer:
179
+ inspect_transformer(transformer, "Transformer")
180
 
 
181
 
182
+ # ============================================================
183
+ # LOAD TEXT ENCODER
184
+ # ============================================================
185
  log("\n===================================================")
186
  log("πŸ”§ LOADING TEXT ENCODER")
187
  log("===================================================")
188
 
189
+ log_system_stats("START TEXT ENCODER LOAD")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
+ try:
192
+ quant_cfg2 = TransformersBitsAndBytesConfig(
193
+ load_in_4bit=True,
194
+ bnb_4bit_quant_type="nf4",
195
+ bnb_4bit_compute_dtype=torch_dtype,
196
+ bnb_4bit_use_double_quant=True,
197
+ )
198
+
199
+ text_encoder = AutoModel.from_pretrained(
200
+ model_id,
201
+ cache_dir=model_cache,
202
+ subfolder="text_encoder",
203
+ quantization_config=quant_cfg2,
204
+ torch_dtype=torch_dtype,
205
+ device_map=device,
206
+ )
207
+ log("βœ… Text encoder loaded successfully.")
208
 
209
+ except Exception as e:
210
+ log(f"❌ Text encoder load failed: {e}")
211
+ text_encoder = None
212
 
213
+ log_system_stats("AFTER TEXT ENCODER LOAD")
214
 
215
+ if text_encoder:
216
+ inspect_transformer(text_encoder, "Text Encoder")
217
 
218
+
219
+ # ============================================================
220
+ # BUILD PIPELINE
221
+ # ============================================================
222
  log("\n===================================================")
223
+ log("πŸ”§ BUILDING PIPELINE")
224
  log("===================================================")
225
 
226
+ log_system_stats("START PIPELINE BUILD")
 
 
 
 
 
227
 
228
+ try:
229
+ pipe = ZImagePipeline.from_pretrained(
230
+ model_id,
231
+ transformer=transformer,
232
+ text_encoder=text_encoder,
233
+ torch_dtype=torch_dtype,
234
+ )
235
+ pipe.to(device)
236
+ log("βœ… Pipeline built successfully.")
237
 
238
+ except Exception as e:
239
+ log(f"❌ Pipeline build failed: {e}")
240
+ pipe = None
241
 
242
+ log_system_stats("AFTER PIPELINE BUILD")
243
 
 
244
 
245
  # ============================================================
246
+ # INFERENCE
247
+ # ============================================================
248
+ @spaces.GPU
249
+ def generate_image(prompt, height, width, steps, seed):
250
+ global LOGS
251
+ LOGS = "" # reset logs
252
+
253
+
254
+ log("===================================================")
255
+ log("🎨 RUNNING INFERENCE")
256
+ log("===================================================")
257
+ log_system_stats("BEFORE INFERENCE")
258
 
 
259
  try:
260
+ generator = torch.Generator(device).manual_seed(seed)
261
+ latent_history = []
262
+
263
+ # Callback to save latents and GPU info
264
+ def save_latents(step, timestep, latents):
265
+ latent_history.append(latents.detach().clone())
266
+ gpu_mem = torch.cuda.memory_allocated(0)/1e9
267
+ log(f"Step {step} - GPU Memory Used: {gpu_mem:.2f} GB")
268
+
269
+ # Step 3: Loop over pipeline for step-wise generation
270
+ for step, img in pipe(
271
+ prompt=prompt,
272
+ height=height,
273
+ width=width,
274
+ num_inference_steps=steps,
275
+ guidance_scale=0.0,
276
+ generator=generator,
277
+ callback=save_latents,
278
+ callback_steps=1
279
+ ).iter():
280
+ # Optionally: yield intermediate images or just store latents
281
+ current_latent = latent_history[-1] if latent_history else None
282
+ # You can process current_latent here if needed
283
+
284
+ log("βœ… Inference finished.")
285
+ log_system_stats("AFTER INFERENCE")
286
+
287
+ # Return final image + logs
288
+ return img, LOGS
289
 
290
+ except Exception as e:
291
+ log(f"❌ Inference error: {e}")
292
+ return None, LOGS
293
 
 
294
 
 
295
 
296
  @spaces.GPU
297
+ def generate_image(prompt, height, width, steps, seed):
298
  global LOGS
299
+ LOGS = "" # reset logs
300
+
301
+
302
  log("===================================================")
303
+ log("🎨 RUNNING INFERENCE")
304
  log("===================================================")
305
+ log_system_stats("BEFORE INFERENCE")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ try:
308
+ generator = torch.Generator(device).manual_seed(seed)
309
+ latent_history = []
310
+
311
+ # Callback to save latents and GPU info
312
+ def save_latents(step, timestep, latents):
313
+ latent_history.append(latents.detach().clone())
314
+ gpu_mem = torch.cuda.memory_allocated(0)/1e9
315
+ log(f"Step {step} - GPU Memory Used: {gpu_mem:.2f} GB")
316
+
317
+ # Step-wise loop just for latent capture
318
+ for step, _ in pipe(
319
+ prompt=prompt,
320
+ height=height,
321
+ width=width,
322
+ num_inference_steps=steps,
323
+ guidance_scale=0.0,
324
+ generator=generator,
325
+ callback=save_latents,
326
+ callback_steps=1
327
+ ).iter():
328
+ pass # only capturing latents, ignoring intermediate images
329
+
330
+ # Original final image generation
331
+ output = pipe(
332
+ prompt=prompt,
333
+ height=height,
334
+ width=width,
335
+ num_inference_steps=steps,
336
+ guidance_scale=0.0,
337
+ generator=generator,
338
+ )
339
+
340
+ log("βœ… Inference finished.")
341
+ log_system_stats("AFTER INFERENCE")
342
+
343
+ return output.images[0], latent_history, LOGS
344
+
345
+ except Exception as e:
346
+ log(f"❌ Inference error: {e}")
347
+ return None, None, LOGS
348
 
 
349
 
350
+ # ============================================================
351
+ # UI
352
  # ============================================================
353
 
354
  with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
355
+ gr.Markdown("# **πŸš€ Z-Image-Turbo β€” Final Image & Latents**")
356
 
 
357
  with gr.Row():
358
  with gr.Column(scale=1):
359
  prompt = gr.Textbox(label="Prompt", value="Realistic mid-aged male image")
360
  height = gr.Slider(256, 2048, value=1024, step=8, label="Height")
361
  width = gr.Slider(256, 2048, value=1024, step=8, label="Width")
362
+ steps = gr.Slider(1, 50, value=20, step=1, label="Inference Steps")
363
+ seed = gr.Number(value=42, label="Seed")
364
+ run_btn = gr.Button("Generate Image")
365
 
366
  with gr.Column(scale=1):
367
+ final_image = gr.Image(label="Final Image")
368
+ latent_gallery = gr.Gallery(label="Latent Steps").style(grid=[4], height="256px")
369
+ logs_box = gr.Textbox(label="Logs", lines=15)
370
 
371
+ run_btn.click(
372
+ generate_image,
373
  inputs=[prompt, height, width, steps, seed],
374
+ outputs=[final_image, latent_gallery, logs_box]
375
  )
 
376
 
377
+
378
+ demo.launch()