rahul7star commited on
Commit
dc103ee
Β·
verified Β·
1 Parent(s): 5ecec1f

Update app1.py

Browse files
Files changed (1) hide show
  1. app1.py +16 -19
app1.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import spaces
3
  import gradio as gr
4
  from diffusers import DiffusionPipeline
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import diffusers
7
  import io
8
 
@@ -18,30 +18,21 @@ def log(msg):
18
  # Enable diffusers debug logs
19
  diffusers.utils.logging.set_verbosity_info()
20
 
21
- log("Loading Z-Image-Turbo pipeline...")
22
-
23
- # ------------------------
24
- # Load FP8 text encoder
25
- # ------------------------
26
- log("Loading FP8 text encoder: Qwen/Qwen3-VL-2B-Instruct-FP8...")
27
- fp8_model_name = "Qwen/Qwen3-VL-2B-Instruct-FP8"
28
- fp8_tokenizer = AutoTokenizer.from_pretrained(fp8_model_name)
29
- fp8_encoder = AutoModelForCausalLM.from_pretrained(fp8_model_name, torch_dtype=torch.bfloat16)
30
- fp8_encoder.to("cuda")
31
-
32
  # ------------------------
33
- # Load main diffusion pipeline
34
  # ------------------------
 
35
  pipe = DiffusionPipeline.from_pretrained(
36
  "Tongyi-MAI/Z-Image-Turbo",
37
- torch_dtype=torch.bfloat16,
38
  low_cpu_mem_usage=False,
39
  attn_implementation="kernels-community/vllm-flash-attn3",
40
- text_encoder=fp8_encoder, # swap in FP8 encoder
41
- tokenizer=fp8_tokenizer, # swap tokenizer
42
  )
43
  pipe.to("cuda")
44
 
 
 
 
45
  # ------------------------
46
  # PIPELINE DEBUG INFO
47
  # ------------------------
@@ -98,14 +89,20 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
98
  else:
99
  log(f"Seed: {seed}")
100
 
 
101
  num_images = min(max(1, int(num_images)), 3)
102
 
103
- # Pipeline debug info
 
 
 
 
 
104
  log(pipeline_debug_info(pipe))
105
 
106
  generator = torch.Generator("cuda").manual_seed(int(seed))
107
 
108
- log("Running pipeline forward()...")
109
  result = pipe(
110
  prompt=prompt,
111
  height=int(height),
@@ -142,7 +139,7 @@ examples = [
142
  ["Portrait of a wise old wizard..."],
143
  ]
144
 
145
- with gr.Blocks(title="Z-Image-Turbo Multi Image Demo (FP8 Text Encoder)") as demo:
146
  gr.Markdown("# 🎨 Z-Image-Turbo β€” Multi Image (FP8 Text Encoder)")
147
 
148
  with gr.Row():
 
2
  import spaces
3
  import gradio as gr
4
  from diffusers import DiffusionPipeline
5
+ from transformers import pipeline
6
  import diffusers
7
  import io
8
 
 
18
  # Enable diffusers debug logs
19
  diffusers.utils.logging.set_verbosity_info()
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  # ------------------------
22
+ # LOAD PIPELINES
23
  # ------------------------
24
+ log("Loading Z-Image-Turbo pipeline...")
25
  pipe = DiffusionPipeline.from_pretrained(
26
  "Tongyi-MAI/Z-Image-Turbo",
27
+ dtype=torch.bfloat16,
28
  low_cpu_mem_usage=False,
29
  attn_implementation="kernels-community/vllm-flash-attn3",
 
 
30
  )
31
  pipe.to("cuda")
32
 
33
+ log("Loading FP8 text encoder: Qwen/Qwen3-4B...")
34
+ fp8_encoder = pipeline("text-generation", model="Qwen/Qwen3-4B", device=0) # device=0 β†’ CUDA
35
+
36
  # ------------------------
37
  # PIPELINE DEBUG INFO
38
  # ------------------------
 
89
  else:
90
  log(f"Seed: {seed}")
91
 
92
+ # Clamp images to 1–3
93
  num_images = min(max(1, int(num_images)), 3)
94
 
95
+ # Run FP8 text encoder first
96
+ log("Encoding prompt with FP8 text encoder...")
97
+ encoded_prompt = fp8_encoder([{"role": "user", "content": prompt}])
98
+ log(f"FP8 encoding output: {encoded_prompt}")
99
+
100
+ # Debug pipeline info
101
  log(pipeline_debug_info(pipe))
102
 
103
  generator = torch.Generator("cuda").manual_seed(int(seed))
104
 
105
+ log("Running Z-Image-Turbo pipeline forward()...")
106
  result = pipe(
107
  prompt=prompt,
108
  height=int(height),
 
139
  ["Portrait of a wise old wizard..."],
140
  ]
141
 
142
+ with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo:
143
  gr.Markdown("# 🎨 Z-Image-Turbo β€” Multi Image (FP8 Text Encoder)")
144
 
145
  with gr.Row():