rahul7star commited on
Commit
5ecec1f
Β·
verified Β·
1 Parent(s): 551f60a

Update app1.py

Browse files
Files changed (1) hide show
  1. app1.py +18 -29
app1.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import spaces
3
  import gradio as gr
4
  from diffusers import DiffusionPipeline
5
- from transformers import AutoConfig, AutoModel
6
  import diffusers
7
  import io
8
 
@@ -20,43 +20,33 @@ diffusers.utils.logging.set_verbosity_info()
20
 
21
  log("Loading Z-Image-Turbo pipeline...")
22
 
23
- # Load main pipeline
 
 
 
 
 
 
 
 
 
 
 
24
  pipe = DiffusionPipeline.from_pretrained(
25
  "Tongyi-MAI/Z-Image-Turbo",
26
  torch_dtype=torch.bfloat16,
27
  low_cpu_mem_usage=False,
28
  attn_implementation="kernels-community/vllm-flash-attn3",
 
 
29
  )
30
  pipe.to("cuda")
31
 
32
- # ------------------------
33
- # Load FP8 Text Encoder
34
- # ------------------------
35
- log("Loading FP8 text encoder...")
36
- from safetensors.torch import load_file
37
- import torch
38
- from transformers import AutoConfig
39
- from qwen.modeling_qwen import QwenForCausalLM # ensure Qwen3 model class is imported
40
-
41
- # Load config
42
- config = AutoConfig.from_pretrained("jiangchengchengNLP/qwen3-4b-fp8-scaled")
43
-
44
- # Initialize model
45
- fp8_encoder = QwenForCausalLM(config)
46
-
47
- # Load weights manually
48
- weights = load_file("qwen3_4b_fp8_scaled.safetensors")
49
- fp8_encoder.load_state_dict(weights, strict=False)
50
-
51
- fp8_encoder.to("cuda")
52
-
53
-
54
  # ------------------------
55
  # PIPELINE DEBUG INFO
56
  # ------------------------
57
  def pipeline_debug_info(pipe):
58
- info = []
59
- info.append("=== PIPELINE DEBUG INFO ===")
60
  try:
61
  tr = pipe.transformer.config
62
  info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}")
@@ -108,10 +98,9 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
108
  else:
109
  log(f"Seed: {seed}")
110
 
111
- # Clamp images to 1–3
112
  num_images = min(max(1, int(num_images)), 3)
113
 
114
- # Debug pipeline info
115
  log(pipeline_debug_info(pipe))
116
 
117
  generator = torch.Generator("cuda").manual_seed(int(seed))
@@ -153,7 +142,7 @@ examples = [
153
  ["Portrait of a wise old wizard..."],
154
  ]
155
 
156
- with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo:
157
  gr.Markdown("# 🎨 Z-Image-Turbo β€” Multi Image (FP8 Text Encoder)")
158
 
159
  with gr.Row():
 
2
  import spaces
3
  import gradio as gr
4
  from diffusers import DiffusionPipeline
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import diffusers
7
  import io
8
 
 
20
 
21
  log("Loading Z-Image-Turbo pipeline...")
22
 
23
+ # ------------------------
24
+ # Load FP8 text encoder
25
+ # ------------------------
26
+ log("Loading FP8 text encoder: Qwen/Qwen3-VL-2B-Instruct-FP8...")
27
+ fp8_model_name = "Qwen/Qwen3-VL-2B-Instruct-FP8"
28
+ fp8_tokenizer = AutoTokenizer.from_pretrained(fp8_model_name)
29
+ fp8_encoder = AutoModelForCausalLM.from_pretrained(fp8_model_name, torch_dtype=torch.bfloat16)
30
+ fp8_encoder.to("cuda")
31
+
32
+ # ------------------------
33
+ # Load main diffusion pipeline
34
+ # ------------------------
35
  pipe = DiffusionPipeline.from_pretrained(
36
  "Tongyi-MAI/Z-Image-Turbo",
37
  torch_dtype=torch.bfloat16,
38
  low_cpu_mem_usage=False,
39
  attn_implementation="kernels-community/vllm-flash-attn3",
40
+ text_encoder=fp8_encoder, # swap in FP8 encoder
41
+ tokenizer=fp8_tokenizer, # swap tokenizer
42
  )
43
  pipe.to("cuda")
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # ------------------------
46
  # PIPELINE DEBUG INFO
47
  # ------------------------
48
  def pipeline_debug_info(pipe):
49
+ info = ["=== PIPELINE DEBUG INFO ==="]
 
50
  try:
51
  tr = pipe.transformer.config
52
  info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}")
 
98
  else:
99
  log(f"Seed: {seed}")
100
 
 
101
  num_images = min(max(1, int(num_images)), 3)
102
 
103
+ # Pipeline debug info
104
  log(pipeline_debug_info(pipe))
105
 
106
  generator = torch.Generator("cuda").manual_seed(int(seed))
 
142
  ["Portrait of a wise old wizard..."],
143
  ]
144
 
145
+ with gr.Blocks(title="Z-Image-Turbo Multi Image Demo (FP8 Text Encoder)") as demo:
146
  gr.Markdown("# 🎨 Z-Image-Turbo β€” Multi Image (FP8 Text Encoder)")
147
 
148
  with gr.Row():