programmersd commited on
Commit
1e958eb
·
verified ·
1 Parent(s): a467abb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -144
app.py CHANGED
@@ -1,194 +1,209 @@
1
  import os
2
  import gc
 
3
  import time
4
  import random
5
  import torch
6
  import gradio as gr
7
-
8
- # =====================================================
9
- # 🛠 CPU OPTIMIZED SETTINGS
10
- # =====================================================
11
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
13
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
14
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
15
-
16
- CPU_THREADS = min(4, os.cpu_count() or 1)
17
- os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
18
- os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
19
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
20
  os.environ["TRANSFORMERS_CACHE"] = "./hf_cache"
21
  os.environ["HF_DATASETS_CACHE"] = "./hf_cache"
22
 
23
  torch.set_num_threads(CPU_THREADS)
24
  torch.set_grad_enabled(False)
 
 
 
 
25
 
26
  DEVICE = "cpu"
27
  DTYPE = torch.float32
28
  CACHE_DIR = "./hf_cache"
29
  os.makedirs(CACHE_DIR, exist_ok=True)
30
 
31
- print("⚡ Z-Image Turbo CPU Optimized (Latest Docs)")
32
 
33
  try:
34
- from huggingface_hub import hf_hub_download
35
- from diffusers import (
36
- ZImagePipeline,
37
- ZImageTransformer2DModel,
38
- GGUFQuantizationConfig,
39
- AutoencoderKL,
40
- FlowMatchEulerDiscreteScheduler
41
- )
42
- from transformers import CLIPTokenizer, CLIPTextModel
43
  except ImportError as e:
44
- print(f"⚠️ Imports may not load: {e}")
 
45
 
46
  pipe = None
47
- _pipe_lock = False
 
48
 
49
- def load_text_encoder_min():
50
- tokenizer = CLIPTokenizer.from_pretrained(
51
- "openai/clip-vit-base-patch32", cache_dir=CACHE_DIR
52
- )
53
- text_encoder = CLIPTextModel.from_pretrained(
54
- "openai/clip-vit-base-patch32",
55
- cache_dir=CACHE_DIR,
56
- torch_dtype=DTYPE,
57
- low_cpu_mem_usage=True
58
- )
59
- return tokenizer, text_encoder
60
 
61
  def load_pipeline():
62
- global pipe, _pipe_lock
63
- if pipe is not None:
64
- return pipe
65
- if _pipe_lock:
66
- raise gr.Error("Pipeline already loading…")
67
- _pipe_lock = True
68
 
69
- try:
70
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
71
- "Tongyi-MAI/Z-Image-Turbo",
72
- subfolder="scheduler",
73
- cache_dir=CACHE_DIR,
74
- low_cpu_mem_usage=True
75
- )
76
 
77
- vae = AutoencoderKL.from_pretrained(
78
  "Tongyi-MAI/Z-Image-Turbo",
79
- subfolder="vae",
80
- cache_dir=CACHE_DIR,
81
  torch_dtype=DTYPE,
82
- low_cpu_mem_usage=True
83
- )
84
-
85
- tokenizer, text_encoder = load_text_encoder_min()
86
-
87
- gguf_path = hf_hub_download(
88
- repo_id="unsloth/Z-Image-Turbo-GGUF",
89
- filename="z-image-turbo-Q2_K.gguf",
90
  cache_dir=CACHE_DIR,
91
- resume_download=True
92
- )
93
-
94
- transformer = ZImageTransformer2DModel.from_single_file(
95
- gguf_path,
96
- quantization_config=GGUFQuantizationConfig(compute_dtype=DTYPE),
97
- torch_dtype=DTYPE,
98
  low_cpu_mem_usage=True
99
  )
100
 
101
- pipe = ZImagePipeline(
102
- vae=vae,
103
- text_encoder=text_encoder,
104
- tokenizer=tokenizer,
105
- transformer=transformer,
106
- scheduler=scheduler
107
- ).to(DEVICE)
108
-
109
- pipe.enable_attention_slicing()
110
- pipe.enable_vae_slicing()
111
- pipe.enable_vae_tiling()
112
- pipe.set_progress_bar_config(disable=True)
113
-
114
  pipe.vae.eval()
115
  pipe.text_encoder.eval()
116
  pipe.transformer.eval()
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  return pipe
119
 
120
- except Exception as e:
121
- raise gr.Error(f"Failed to load model: {e}")
122
-
123
- finally:
124
- _pipe_lock = False
125
-
126
  @torch.inference_mode()
127
- def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
 
128
  if not prompt.strip():
129
- raise gr.Error("Prompt required")
130
-
131
- width = (max(256, min(int(width), 512)) // 64) * 64
132
- height = (max(256, min(int(height), 512)) // 64) * 64
133
- steps = max(1, min(int(steps), 4))
134
-
135
- if seed < 0 or seed == "":
136
- seed = random.randint(0, 2**31 - 1)
137
- else:
138
- seed = int(seed)
139
-
140
- gc.collect()
141
-
142
- pipe = load_pipeline()
143
- generator = torch.Generator(device="cpu").manual_seed(seed)
144
-
145
- start = time.time()
146
- def callback(step, *_):
147
- elapsed = time.time() - start
148
- avg = elapsed / (step + 1)
149
- remaining = avg * (steps - step - 1)
150
- progress((step+1)/steps, desc=f"Step {step+1}/{steps} | ETA {remaining:.1f}s")
151
-
152
- result = pipe(
153
- prompt=prompt,
154
- negative_prompt=None,
155
- width=width,
156
- height=height,
157
- num_inference_steps=steps,
158
- guidance_scale=1.0,
159
- generator=generator,
160
- callback=callback,
161
- callback_steps=1,
162
- output_type="pil"
163
- )
164
-
165
- image = result.images[0]
166
- del result
167
- gc.collect()
168
- return image, seed
169
-
170
- with gr.Blocks() as demo:
171
- gr.Markdown("# ⚡ Z-Image Turbo — CPU Optimized")
172
-
173
- prompt = gr.Textbox(label="Prompt")
174
- width = gr.Slider(256, 512, 256, step=64, label="Width")
175
- height = gr.Slider(256, 512, 256, step=64, label="Height")
176
- steps = gr.Slider(1, 4, 2, step=1, label="Steps")
177
- seed = gr.Number(value=-1, precision=0, label="Seed (-1=random)")
178
-
179
- btn = gr.Button("🚀 Generate")
180
- output = gr.Image(label="Output")
181
- used_seed = gr.Number(label="Seed Used", interactive=False)
182
-
183
- btn.click(
184
- generate,
185
- inputs=[prompt, width, height, steps, seed],
186
- outputs=[output, used_seed],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  concurrency_limit=1
188
  )
189
 
190
- # Enable queue with up to 2 pending jobs
191
- demo.queue(max_size=2) # queues events per current Gradio docs :contentReference[oaicite:1]{index=1}
192
 
193
  if __name__ == "__main__":
194
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  import gc
3
+ import sys
4
  import time
5
  import random
6
  import torch
7
  import gradio as gr
8
+ from threading import Lock
9
+ from contextlib import contextmanager
10
+
11
+ # --- LOGGING FOR UI ---
12
+ LOG_BUFFER = []
13
+ LOG_LOCK = Lock()
14
+
15
+ def log(message):
16
+ print(message)
17
+ with LOG_LOCK:
18
+ LOG_BUFFER.append(f"{time.strftime('%H:%M:%S')} | {message}")
19
+ if len(LOG_BUFFER) > 500:
20
+ LOG_BUFFER.pop(0)
21
+ return "\n".join(LOG_BUFFER)
22
+
23
+ # 🚀 Initialization log
24
+ _initial_logs = log("🚀 Initializing Ultimate Z-Image Turbo CPU Edition...")
25
+
26
+ # CPU THREAD OPTIMIZATION
27
+ CPU_THREADS = min(8, os.cpu_count() or 1)
28
+ os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
29
+ os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
30
+ os.environ["OPENBLAS_NUM_THREADS"] = str(CPU_THREADS)
31
+ os.environ["VECLIB_MAXIMUM_THREADS"] = str(CPU_THREADS)
32
+ os.environ["NUMEXPR_NUM_THREADS"] = str(CPU_THREADS)
33
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
34
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
35
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 
 
 
 
 
36
  os.environ["TRANSFORMERS_CACHE"] = "./hf_cache"
37
  os.environ["HF_DATASETS_CACHE"] = "./hf_cache"
38
 
39
  torch.set_num_threads(CPU_THREADS)
40
  torch.set_grad_enabled(False)
41
+ torch.backends.mkldnn.enabled = True
42
+ torch.backends.mkldnn.deterministic = False
43
+ torch.set_flush_denormal(True)
44
+ torch.set_float32_matmul_precision("medium")
45
 
46
  DEVICE = "cpu"
47
  DTYPE = torch.float32
48
  CACHE_DIR = "./hf_cache"
49
  os.makedirs(CACHE_DIR, exist_ok=True)
50
 
51
+ log(f"⚡ CPU Threads: {CPU_THREADS}, Device: {DEVICE}, DType: {DTYPE}")
52
 
53
  try:
54
+ from diffusers import ZImagePipeline
55
+ log("📦 diffusers imported successfully")
 
 
 
 
 
 
 
56
  except ImportError as e:
57
+ log(f" Import Error: {e}")
58
+ sys.exit(1)
59
 
60
  pipe = None
61
+ _pipe_lock = Lock()
62
+ _generation_lock = Lock()
63
 
64
+ @contextmanager
65
+ def managed_memory():
66
+ try:
67
+ yield
68
+ finally:
69
+ gc.collect()
70
+ gc.collect()
71
+ if torch.cuda.is_available():
72
+ torch.cuda.empty_cache()
 
 
73
 
74
  def load_pipeline():
75
+ global pipe
76
+ with _pipe_lock:
77
+ if pipe is not None:
78
+ return pipe
 
 
79
 
80
+ log("📦 Loading Z-Image Turbo pipeline...")
81
+ start_load = time.time()
 
 
 
 
 
82
 
83
+ pipe = ZImagePipeline.from_pretrained(
84
  "Tongyi-MAI/Z-Image-Turbo",
 
 
85
  torch_dtype=DTYPE,
 
 
 
 
 
 
 
 
86
  cache_dir=CACHE_DIR,
 
 
 
 
 
 
 
87
  low_cpu_mem_usage=True
88
  )
89
 
90
+ pipe = pipe.to(DEVICE)
 
 
 
 
 
 
 
 
 
 
 
 
91
  pipe.vae.eval()
92
  pipe.text_encoder.eval()
93
  pipe.transformer.eval()
94
 
95
+ try:
96
+ pipe.transformer = torch.compile(
97
+ pipe.transformer,
98
+ mode="reduce-overhead",
99
+ fullgraph=False,
100
+ dynamic=False
101
+ )
102
+ log("✅ Transformer compiled successfully!")
103
+ except Exception as compile_error:
104
+ log(f"⚠️ torch.compile() failed: {compile_error}")
105
+
106
+ load_time = time.time() - start_load
107
+ log(f"✅ Pipeline loaded in {load_time:.2f}s")
108
  return pipe
109
 
 
 
 
 
 
 
110
  @torch.inference_mode()
111
+ @torch.no_grad()
112
+ def generate(prompt, quality_mode, seed, progress=gr.Progress()):
113
  if not prompt.strip():
114
+ raise gr.Error("🎯 Prompt cannot be empty!")
115
+
116
+ quality_settings = {
117
+ "ultra_fast": {"steps": 1, "width": 256, "height": 256},
118
+ "fast": {"steps": 1, "width": 256, "height": 256},
119
+ "balanced": {"steps": 2, "width": 256, "height": 256},
120
+ "quality": {"steps": 4, "width": 384, "height": 384},
121
+ "ultra_quality": {"steps": 4, "width": 512, "height": 512}
122
+ }
123
+ settings = quality_settings.get(quality_mode, quality_settings["fast"])
124
+ steps, width, height = settings["steps"], settings["width"], settings["height"]
125
+
126
+ seed = int(seed) if seed >= 0 else random.randint(0, 2**31 - 1)
127
+ log(f"🎨 Generating: '{prompt[:50]}...' | Mode: {quality_mode} | {width}x{height} | Seed: {seed}")
128
+
129
+ with managed_memory():
130
+ with _generation_lock:
131
+ pipe = load_pipeline()
132
+ generator = torch.Generator("cpu").manual_seed(seed)
133
+ start_time = time.time()
134
+
135
+ def progress_callback(step, *_):
136
+ elapsed = time.time() - start_time
137
+ avg = elapsed / (step + 1) if step >= 0 else 0
138
+ remaining = avg * (steps - step - 1)
139
+ progress(
140
+ (step + 1) / steps,
141
+ desc=f"Step {step+1}/{steps} | ETA {remaining:.1f}s"
142
+ )
143
+
144
+ result = pipe(
145
+ prompt=prompt,
146
+ negative_prompt=None,
147
+ width=width,
148
+ height=height,
149
+ num_inference_steps=steps,
150
+ guidance_scale=0.0,
151
+ generator=generator,
152
+ callback=progress_callback,
153
+ callback_steps=1,
154
+ output_type="pil"
155
+ )
156
+
157
+ image = result.images[0]
158
+ elapsed = time.time() - start_time
159
+ log(f"✅ Generated in {elapsed:.2f}s | Seed: {seed}")
160
+
161
+ del result
162
+ gc.collect()
163
+
164
+ return image, seed
165
+
166
+ with gr.Blocks(title="🚀 Z-Image Turbo Pro Max + Live Logs") as demo:
167
+ gr.Markdown("## GPU‑FREE CPU Turbo — Live Logs Below")
168
+
169
+ with gr.Row():
170
+ with gr.Column():
171
+ prompt = gr.Textbox(label="Prompt", lines=4)
172
+ quality_mode = gr.Radio(
173
+ choices=[
174
+ ("Ultra Fast", "ultra_fast"),
175
+ ("Fast", "fast"),
176
+ ("Balanced", "balanced"),
177
+ ("Quality", "quality"),
178
+ ("Ultra Quality", "ultra_quality")
179
+ ],
180
+ value="fast",
181
+ label="Quality Mode"
182
+ )
183
+ seed = gr.Number(value=-1, precision=0, label="Seed")
184
+ generate_btn = gr.Button("GENERATE")
185
+ with gr.Column():
186
+ output_image = gr.Image(label="Output")
187
+ used_seed = gr.Number(label="Seed Used", interactive=False)
188
+ log_output = gr.Textbox(
189
+ label="Live System Log",
190
+ lines=15,
191
+ interactive=False
192
+ )
193
+
194
+ def wrapped_generate(prompt, quality_mode, seed):
195
+ image, used_seed = generate(prompt, quality_mode, seed)
196
+ logs = log(f"🧠 Latest status: Finished generation.")
197
+ return image, used_seed, logs
198
+
199
+ generate_btn.click(
200
+ wrapped_generate,
201
+ inputs=[prompt, quality_mode, seed],
202
+ outputs=[output_image, used_seed, log_output],
203
  concurrency_limit=1
204
  )
205
 
206
+ demo.queue(max_size=3)
 
207
 
208
  if __name__ == "__main__":
209
  demo.launch(server_name="0.0.0.0", server_port=7860)