programmersd commited on
Commit
24ee00a
Β·
verified Β·
1 Parent(s): f7912b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -109
app.py CHANGED
@@ -6,10 +6,12 @@ import torch
6
  import gradio as gr
7
 
8
  # =====================================================
9
- # πŸ”₯ EXTREME CPU + RAM CONTROL
10
  # =====================================================
11
 
12
- CPU_THREADS = 2 # Ultra survival safe value
 
 
13
 
14
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
15
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
@@ -17,139 +19,225 @@ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
17
  os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
18
  os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
19
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 
20
 
21
  torch.set_num_threads(CPU_THREADS)
22
  torch.set_grad_enabled(False)
 
23
 
24
  DEVICE = "cpu"
25
- DTYPE = torch.float32
26
  CACHE_DIR = "./hf_cache"
27
  os.makedirs(CACHE_DIR, exist_ok=True)
28
 
 
 
29
  # =====================================================
30
- # πŸ“¦ IMPORTS
31
  # =====================================================
32
 
33
- from huggingface_hub import hf_hub_download
34
- from diffusers import (
35
- ZImagePipeline,
36
- ZImageTransformer2DModel,
37
- GGUFQuantizationConfig,
38
- AutoencoderKL,
39
- FlowMatchEulerDiscreteScheduler
40
- )
41
- from transformers import AutoTokenizer, AutoModel
 
 
 
 
 
 
 
 
42
 
43
  # =====================================================
44
- # 🧠 MODEL REFERENCES
45
  # =====================================================
46
 
47
- BASE_MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"
48
- TEXT_ENCODER_ID = "Qwen/Qwen3-4B"
49
- GGUF_REPO_ID = "unsloth/Z-Image-Turbo-GGUF"
50
- GGUF_FILENAME = "z-image-turbo-Q2_K.gguf"
51
-
52
- print("⚑ Initializing Z-Image Turbo ULTRA CPU Engine...")
53
 
54
  # =====================================================
55
- # 🧠 LOAD PIPELINE (MEMORY SAFE)
56
  # =====================================================
57
 
58
- def load_pipeline():
59
-
60
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
61
- BASE_MODEL_ID,
62
- subfolder="scheduler",
63
- cache_dir=CACHE_DIR,
64
- low_cpu_mem_usage=True
65
- )
66
-
67
- vae = AutoencoderKL.from_pretrained(
68
- BASE_MODEL_ID,
69
- subfolder="vae",
70
- torch_dtype=DTYPE,
71
- low_cpu_mem_usage=True,
72
- cache_dir=CACHE_DIR
73
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- tokenizer = AutoTokenizer.from_pretrained(
76
- TEXT_ENCODER_ID,
77
- cache_dir=CACHE_DIR
78
- )
79
 
80
- text_encoder = AutoModel.from_pretrained(
81
- TEXT_ENCODER_ID,
82
- torch_dtype=DTYPE,
83
- low_cpu_mem_usage=True,
84
- cache_dir=CACHE_DIR
85
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- gguf_path = hf_hub_download(
88
- repo_id=GGUF_REPO_ID,
89
- filename=GGUF_FILENAME,
90
- cache_dir=CACHE_DIR,
91
- resume_download=True
92
- )
 
 
 
93
 
94
- transformer = ZImageTransformer2DModel.from_single_file(
95
- gguf_path,
96
- quantization_config=GGUFQuantizationConfig(compute_dtype=DTYPE),
97
- torch_dtype=DTYPE,
98
- low_cpu_mem_usage=True
99
- )
100
 
101
- pipe = ZImagePipeline(
102
- vae=vae,
103
- text_encoder=text_encoder,
104
- tokenizer=tokenizer,
105
- transformer=transformer,
106
- scheduler=scheduler
107
- ).to(DEVICE)
 
108
 
109
- # πŸ”₯ MAX SAFE MEMORY STACK
110
- pipe.enable_attention_slicing()
111
- pipe.enable_vae_slicing()
112
- pipe.enable_vae_tiling()
113
- pipe.set_progress_bar_config(disable=True)
 
114
 
115
- print("βœ… Engine Ready")
116
- return pipe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
 
 
 
 
 
118
 
119
- pipe = load_pipeline()
120
 
121
  # =====================================================
122
- # πŸš€ GENERATION CORE WITH ETA
123
  # =====================================================
124
 
125
  @torch.inference_mode()
126
  def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
 
 
 
 
127
 
128
- if not prompt:
129
- raise gr.Error("Prompt required")
 
 
130
 
131
- # HARD OOM PROTECTION
132
- width = max(256, min(width, 640))
133
- height = max(256, min(height, 640))
134
- steps = max(1, min(steps, 6))
135
 
136
- if seed < 0:
137
  seed = random.randint(0, 2**31 - 1)
 
 
138
 
139
- generator = torch.Generator(device=DEVICE).manual_seed(seed)
140
-
141
- start_time = time.time()
142
-
143
- def callback(step, timestep, latents=None):
144
- elapsed = time.time() - start_time
145
- avg = elapsed / (step + 1)
146
- remaining = avg * (steps - step - 1)
147
- progress(
148
- (step + 1) / steps,
149
- desc=f"Step {step+1}/{steps} | ETA: {remaining:.1f}s"
150
- )
151
 
152
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  result = pipe(
154
  prompt=prompt,
155
  negative_prompt=None,
@@ -159,37 +247,59 @@ def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
159
  guidance_scale=1.0,
160
  generator=generator,
161
  callback=callback,
162
- callback_steps=1
 
163
  )
164
 
165
  image = result.images[0]
 
 
 
166
  gc.collect()
 
167
  return image, seed
168
 
 
 
 
169
  except Exception as e:
170
  gc.collect()
171
- raise gr.Error(f"Generation error: {e}")
 
172
 
173
  # =====================================================
174
- # πŸŽ› UI
175
  # =====================================================
176
 
177
- with gr.Blocks(title="Z-Image Turbo ULTRA CPU") as demo:
178
- gr.Markdown("# ⚑ Z-Image Turbo β€” MAX CPU SURVIVAL MODE")
 
 
179
 
180
- prompt = gr.Textbox(label="Prompt", lines=2)
 
181
 
182
  with gr.Row():
183
- width = gr.Slider(256, 640, 512, step=64)
184
- height = gr.Slider(256, 640, 512, step=64)
 
 
 
 
 
 
 
 
185
 
186
- steps = gr.Slider(1, 6, value=4, step=1)
187
- seed = gr.Number(value=-1, precision=0)
 
188
 
189
- btn = gr.Button("πŸš€ Generate")
190
 
191
- output = gr.Image()
192
- used_seed = gr.Number(label="Seed Used")
 
193
 
194
  btn.click(
195
  generate,
@@ -197,5 +307,22 @@ with gr.Blocks(title="Z-Image Turbo ULTRA CPU") as demo:
197
  outputs=[output, used_seed]
198
  )
199
 
200
- demo.queue(concurrency_count=1, max_size=4)
201
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import gradio as gr
7
 
8
  # =====================================================
9
+ # πŸ”₯ EXTREME CPU + RAM CONTROL - ULTIMATE OPTIMIZATION
10
  # =====================================================
11
 
12
+ CPU_THREADS = 1 # Minimum safe value for HF Spaces
13
+ MAX_RESOLUTION = 512
14
+ MAX_STEPS = 4
15
 
16
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
17
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
 
19
  os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
20
  os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
21
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
+ os.environ["TRANSFORMERS_CACHE"] = "./hf_cache"
23
+ os.environ["HF_DATASETS_CACHE"] = "./hf_cache"
24
 
25
  torch.set_num_threads(CPU_THREADS)
26
  torch.set_grad_enabled(False)
27
+ torch.set_float32_matmul_precision('lowest')
28
 
29
  DEVICE = "cpu"
30
+ DTYPE = torch.float16 # CRITICAL: Use float16 to save 50% memory
31
  CACHE_DIR = "./hf_cache"
32
  os.makedirs(CACHE_DIR, exist_ok=True)
33
 
34
+ print("⚑ Z-Image Turbo ULTRA CPU - EXTREME MODE (HF Spaces 16GB)")
35
+
36
  # =====================================================
37
+ # πŸ“¦ MINIMAL IMPORTS
38
  # =====================================================
39
 
40
+ try:
41
+ from huggingface_hub import hf_hub_download
42
+ from diffusers import (
43
+ ZImagePipeline,
44
+ ZImageTransformer2DModel,
45
+ GGUFQuantizationConfig,
46
+ AutoencoderKL,
47
+ FlowMatchEulerDiscreteScheduler
48
+ )
49
+ from transformers import (
50
+ AutoTokenizer,
51
+ CLIPTextModel,
52
+ BertModel,
53
+ BertTokenizer
54
+ )
55
+ except ImportError as e:
56
+ print(f"⚠️ Import error (models may not load): {e}")
57
 
58
  # =====================================================
59
+ # 🧠 GLOBAL PIPELINE STATE (Lazy Loading)
60
  # =====================================================
61
 
62
+ pipe = None
63
+ _pipe_lock = False
 
 
 
 
64
 
65
  # =====================================================
66
+ # 🎯 LIGHTWEIGHT TEXT ENCODER LOADER
67
  # =====================================================
68
 
69
+ def load_text_encoder_lightweight():
70
+ """Load absolute minimum text encoder"""
71
+ print("πŸ“ Loading lightweight text encoder...")
72
+ try:
73
+ # Try tiny CLIP first
74
+ from transformers import CLIPTokenizer, CLIPTextModel
75
+ tokenizer = CLIPTokenizer.from_pretrained(
76
+ "openai/clip-vit-base-patch32",
77
+ cache_dir=CACHE_DIR,
78
+ local_files_only=False
79
+ )
80
+ text_encoder = CLIPTextModel.from_pretrained(
81
+ "openai/clip-vit-base-patch32",
82
+ torch_dtype=DTYPE,
83
+ low_cpu_mem_usage=True,
84
+ cache_dir=CACHE_DIR,
85
+ local_files_only=False
86
+ )
87
+ return tokenizer, text_encoder
88
+ except Exception as e:
89
+ print(f"⚠️ CLIP failed: {e}, using fallback...")
90
+ # Fallback: Use BERT-tiny (much smaller)
91
+ from transformers import AutoTokenizer, AutoModel
92
+ try:
93
+ tokenizer = AutoTokenizer.from_pretrained(
94
+ "prajjwal1/bert-tiny",
95
+ cache_dir=CACHE_DIR
96
+ )
97
+ text_encoder = AutoModel.from_pretrained(
98
+ "prajjwal1/bert-tiny",
99
+ torch_dtype=DTYPE,
100
+ low_cpu_mem_usage=True,
101
+ cache_dir=CACHE_DIR
102
+ )
103
+ return tokenizer, text_encoder
104
+ except Exception as e2:
105
+ print(f"❌ Both encoders failed: {e2}")
106
+ raise
107
 
108
+ # =====================================================
109
+ # πŸš€ LAZY-LOADED PIPELINE WITH MEMORY CONTROL
110
+ # =====================================================
 
111
 
112
+ def load_pipeline():
113
+ """Load pipeline once, keep in memory"""
114
+ global pipe, _pipe_lock
115
+
116
+ if pipe is not None:
117
+ return pipe
118
+
119
+ if _pipe_lock:
120
+ raise gr.Error("Pipeline already loading. Please wait...")
121
+
122
+ _pipe_lock = True
123
+
124
+ try:
125
+ print("⚑ Loading scheduler...")
126
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
127
+ "Tongyi-MAI/Z-Image-Turbo",
128
+ subfolder="scheduler",
129
+ cache_dir=CACHE_DIR,
130
+ low_cpu_mem_usage=True
131
+ )
132
 
133
+ print("⚑ Loading VAE (memory-optimized)...")
134
+ vae = AutoencoderKL.from_pretrained(
135
+ "Tongyi-MAI/Z-Image-Turbo",
136
+ subfolder="vae",
137
+ torch_dtype=DTYPE,
138
+ low_cpu_mem_usage=True,
139
+ cache_dir=CACHE_DIR,
140
+ variant="fp16" # Force fp16 variant
141
+ )
142
 
143
+ print("⚑ Loading text encoder (lightweight)...")
144
+ tokenizer, text_encoder = load_text_encoder_lightweight()
 
 
 
 
145
 
146
+ print("⚑ Loading transformer (GGUF quantized)...")
147
+ gguf_path = hf_hub_download(
148
+ repo_id="unsloth/Z-Image-Turbo-GGUF",
149
+ filename="z-image-turbo-Q2_K.gguf",
150
+ cache_dir=CACHE_DIR,
151
+ resume_download=True,
152
+ local_files_only=False
153
+ )
154
 
155
+ transformer = ZImageTransformer2DModel.from_single_file(
156
+ gguf_path,
157
+ quantization_config=GGUFQuantizationConfig(compute_dtype=DTYPE),
158
+ torch_dtype=DTYPE,
159
+ low_cpu_mem_usage=True
160
+ )
161
 
162
+ # Build pipeline
163
+ pipe = ZImagePipeline(
164
+ vae=vae,
165
+ text_encoder=text_encoder,
166
+ tokenizer=tokenizer,
167
+ transformer=transformer,
168
+ scheduler=scheduler
169
+ ).to(DEVICE)
170
+
171
+ # EXTREME memory optimization
172
+ pipe.enable_attention_slicing()
173
+ pipe.enable_vae_slicing()
174
+ pipe.enable_vae_tiling()
175
+ pipe.set_progress_bar_config(disable=True)
176
+
177
+ # Explicitly set to eval mode and disable gradients
178
+ pipe.vae.eval()
179
+ pipe.text_encoder.eval()
180
+ pipe.transformer.eval()
181
+
182
+ print("βœ… Pipeline loaded successfully")
183
+ return pipe
184
 
185
+ except Exception as e:
186
+ print(f"❌ Pipeline load failed: {e}")
187
+ raise gr.Error(f"Failed to load model: {str(e)}")
188
+ finally:
189
+ _pipe_lock = False
190
 
 
191
 
192
  # =====================================================
193
+ # 🎨 ULTRA-OPTIMIZED GENERATION
194
  # =====================================================
195
 
196
  @torch.inference_mode()
197
  def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
198
+ """Generate image with aggressive memory management"""
199
+
200
+ if not prompt or not prompt.strip():
201
+ raise gr.Error("❌ Prompt is required")
202
 
203
+ # HARD safety limits for HF Spaces
204
+ width = max(256, min(int(width), 512))
205
+ height = max(256, min(int(height), 512))
206
+ steps = max(1, min(int(steps), 4))
207
 
208
+ # Reduce to multiple of 64
209
+ width = (width // 64) * 64
210
+ height = (height // 64) * 64
 
211
 
212
+ if seed < 0 or seed == "":
213
  seed = random.randint(0, 2**31 - 1)
214
+ else:
215
+ seed = int(seed)
216
 
217
+ # Pre-generation cleanup
218
+ gc.collect()
219
+ if torch.cuda.is_available():
220
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
221
 
222
  try:
223
+ # Load pipeline on first use
224
+ pipe = load_pipeline()
225
+
226
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
227
+
228
+ start_time = time.time()
229
+
230
+ def callback(step, timestep, latents=None):
231
+ elapsed = time.time() - start_time
232
+ avg = elapsed / (step + 1) if step > 0 else 0
233
+ remaining = avg * (steps - step - 1) if step < steps - 1 else 0
234
+ progress(
235
+ (step + 1) / steps,
236
+ desc=f"Step {step+1}/{steps} | ETA: {remaining:.1f}s"
237
+ )
238
+
239
+ print(f"🎨 Generating {width}x{height} in {steps} steps...")
240
+
241
  result = pipe(
242
  prompt=prompt,
243
  negative_prompt=None,
 
247
  guidance_scale=1.0,
248
  generator=generator,
249
  callback=callback,
250
+ callback_steps=1,
251
+ output_type="pil"
252
  )
253
 
254
  image = result.images[0]
255
+
256
+ # Post-generation cleanup
257
+ del result
258
  gc.collect()
259
+
260
  return image, seed
261
 
262
+ except torch.cuda.OutOfMemoryError:
263
+ gc.collect()
264
+ raise gr.Error("❌ Out of memory! Try smaller size or fewer steps")
265
  except Exception as e:
266
  gc.collect()
267
+ raise gr.Error(f"❌ Generation error: {str(e)}")
268
+
269
 
270
  # =====================================================
271
+ # πŸŽ›οΈ MINIMAL GRADIO UI
272
  # =====================================================
273
 
274
+ with gr.Blocks(title="Z-Image Turbo CPU") as demo:
275
+ gr.Markdown("""
276
+ # ⚑ Z-Image Turbo β€” CPU ULTRA MODE
277
+ **HF Spaces Optimized | 16GB RAM | No GPU**
278
 
279
+ ⚠️ Slow generation expected on CPU. Start with 256x256 and low steps.
280
+ """)
281
 
282
  with gr.Row():
283
+ with gr.Column(scale=2):
284
+ prompt = gr.Textbox(
285
+ label="Prompt",
286
+ placeholder="Describe what you want...",
287
+ lines=3
288
+ )
289
+
290
+ with gr.Row():
291
+ width = gr.Slider(256, 512, 256, step=64, label="Width")
292
+ height = gr.Slider(256, 512, 256, step=64, label="Height")
293
 
294
+ with gr.Row():
295
+ steps = gr.Slider(1, 4, 2, step=1, label="Steps")
296
+ seed = gr.Number(value=-1, precision=0, label="Seed (-1=random)")
297
 
298
+ btn = gr.Button("πŸš€ Generate", variant="primary", scale=2)
299
 
300
+ with gr.Column(scale=1):
301
+ output = gr.Image(label="Output")
302
+ used_seed = gr.Number(label="Seed Used", interactive=False)
303
 
304
  btn.click(
305
  generate,
 
307
  outputs=[output, used_seed]
308
  )
309
 
310
+ gr.Markdown("""
311
+ ### ⚑ Performance Tips
312
+ - Start with **256x256** resolution
313
+ - Use **1-2 steps** for fast results
314
+ - Each step takes ~30-60s on CPU
315
+ - Results improve with more steps
316
+ - Negative seeds auto-randomize
317
+
318
+ ### πŸ’Ύ Memory Strategy
319
+ - Models loaded on first request only
320
+ - Aggressive garbage collection after each run
321
+ - float16 reduces memory by 50%
322
+ - VAE tiling saves additional ~2GB
323
+ """)
324
+
325
+ demo.queue(concurrency_count=1, max_size=2)
326
+
327
+ if __name__ == "__main__":
328
+ demo.launch(server_name="0.0.0.0", server_port=7860)