Spaces:
Running
on
Zero
Running
on
Zero
Update app1.py
Browse files
app1.py
CHANGED
|
@@ -2,10 +2,9 @@ import torch
|
|
| 2 |
import spaces
|
| 3 |
import gradio as gr
|
| 4 |
from diffusers import DiffusionPipeline
|
|
|
|
| 5 |
import diffusers
|
| 6 |
import io
|
| 7 |
-
import logging
|
| 8 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 9 |
|
| 10 |
# ------------------------
|
| 11 |
# GLOBAL LOG BUFFER
|
|
@@ -21,45 +20,39 @@ diffusers.utils.logging.set_verbosity_info()
|
|
| 21 |
|
| 22 |
log("Loading Z-Image-Turbo pipeline...")
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
# Load FP8 text encoder + tokenizer
|
| 26 |
-
# ------------------------
|
| 27 |
-
log("Loading FP8 Qwen3-4B tokenizer + text encoder...")
|
| 28 |
-
fp8_tokenizer = AutoTokenizer.from_pretrained(
|
| 29 |
-
"jiangchengchengNLP/qwen3-4b-fp8-scaled"
|
| 30 |
-
)
|
| 31 |
-
fp8_text_encoder = AutoModelForCausalLM.from_pretrained(
|
| 32 |
-
"jiangchengchengNLP/qwen3-4b-fp8-scaled",
|
| 33 |
-
device_map="auto",
|
| 34 |
-
torch_dtype=torch.bfloat16, # can replace with torch.float8_e4m3fn if PyTorch nightly supports
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
# ------------------------
|
| 38 |
-
# Load Z-Image-Turbo
|
| 39 |
-
# ------------------------
|
| 40 |
pipe = DiffusionPipeline.from_pretrained(
|
| 41 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 42 |
torch_dtype=torch.bfloat16,
|
| 43 |
low_cpu_mem_usage=False,
|
| 44 |
attn_implementation="kernels-community/vllm-flash-attn3",
|
| 45 |
)
|
| 46 |
-
|
| 47 |
-
# Inject FP8 tokenizer + text encoder
|
| 48 |
-
pipe.tokenizer = fp8_tokenizer
|
| 49 |
-
pipe.text_encoder = fp8_text_encoder
|
| 50 |
pipe.to("cuda")
|
| 51 |
|
| 52 |
# ------------------------
|
| 53 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# ------------------------
|
| 55 |
def pipeline_debug_info(pipe):
|
| 56 |
info = []
|
| 57 |
info.append("=== PIPELINE DEBUG INFO ===")
|
| 58 |
-
|
| 59 |
try:
|
| 60 |
tr = pipe.transformer.config
|
| 61 |
info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}")
|
| 62 |
-
# Z-Image-Turbo keys
|
| 63 |
info.append(f"Hidden dim: {tr.get('hidden_dim')}")
|
| 64 |
info.append(f"Attention heads: {tr.get('num_heads')}")
|
| 65 |
info.append(f"Depth (layers): {tr.get('depth')}")
|
|
@@ -69,7 +62,6 @@ def pipeline_debug_info(pipe):
|
|
| 69 |
except Exception as e:
|
| 70 |
info.append(f"Transformer diagnostics failed: {e}")
|
| 71 |
|
| 72 |
-
# VAE info
|
| 73 |
try:
|
| 74 |
vae = pipe.vae.config
|
| 75 |
info.append(f"VAE latent channels: {vae.latent_channels}")
|
|
@@ -79,7 +71,6 @@ def pipeline_debug_info(pipe):
|
|
| 79 |
|
| 80 |
return "\n".join(info)
|
| 81 |
|
| 82 |
-
|
| 83 |
def latent_shape_info(h, w, pipe):
|
| 84 |
try:
|
| 85 |
c = pipe.vae.config.latent_channels
|
|
@@ -90,9 +81,8 @@ def latent_shape_info(h, w, pipe):
|
|
| 90 |
except Exception as e:
|
| 91 |
return f"Latent shape calc failed: {e}"
|
| 92 |
|
| 93 |
-
|
| 94 |
# ------------------------
|
| 95 |
-
# IMAGE
|
| 96 |
# ------------------------
|
| 97 |
@spaces.GPU
|
| 98 |
def generate_image(prompt, height, width, num_inference_steps, seed, randomize_seed, num_images):
|
|
@@ -111,6 +101,7 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
|
|
| 111 |
else:
|
| 112 |
log(f"Seed: {seed}")
|
| 113 |
|
|
|
|
| 114 |
num_images = min(max(1, int(num_images)), 3)
|
| 115 |
|
| 116 |
# Debug pipeline info
|
|
@@ -131,7 +122,7 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
|
|
| 131 |
output_type="pil",
|
| 132 |
)
|
| 133 |
|
| 134 |
-
#
|
| 135 |
try:
|
| 136 |
log(f"VAE latent channels: {pipe.vae.config.latent_channels}")
|
| 137 |
log(f"VAE scaling factor: {pipe.vae.config.scaling_factor}")
|
|
@@ -144,7 +135,6 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
|
|
| 144 |
|
| 145 |
return result.images, seed, log_buffer.getvalue()
|
| 146 |
|
| 147 |
-
|
| 148 |
# ------------------------
|
| 149 |
# GRADIO UI
|
| 150 |
# ------------------------
|
|
@@ -157,7 +147,7 @@ examples = [
|
|
| 157 |
]
|
| 158 |
|
| 159 |
with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo:
|
| 160 |
-
gr.Markdown("# π¨ Z-Image-Turbo β Multi Image ")
|
| 161 |
|
| 162 |
with gr.Row():
|
| 163 |
with gr.Column(scale=1):
|
|
@@ -181,7 +171,7 @@ with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo:
|
|
| 181 |
generate_btn = gr.Button("π Generate", variant="primary")
|
| 182 |
|
| 183 |
with gr.Column(scale=1):
|
| 184 |
-
output_images = gr.Gallery(label="Generated Images")
|
| 185 |
used_seed = gr.Number(label="Seed Used", interactive=False)
|
| 186 |
debug_log = gr.Textbox(label="Debug Log Output", lines=25, interactive=False)
|
| 187 |
|
|
|
|
| 2 |
import spaces
|
| 3 |
import gradio as gr
|
| 4 |
from diffusers import DiffusionPipeline
|
| 5 |
+
from transformers import AutoConfig, AutoModel
|
| 6 |
import diffusers
|
| 7 |
import io
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# ------------------------
|
| 10 |
# GLOBAL LOG BUFFER
|
|
|
|
| 20 |
|
| 21 |
log("Loading Z-Image-Turbo pipeline...")
|
| 22 |
|
| 23 |
+
# Load main pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
pipe = DiffusionPipeline.from_pretrained(
|
| 25 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 26 |
torch_dtype=torch.bfloat16,
|
| 27 |
low_cpu_mem_usage=False,
|
| 28 |
attn_implementation="kernels-community/vllm-flash-attn3",
|
| 29 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
pipe.to("cuda")
|
| 31 |
|
| 32 |
# ------------------------
|
| 33 |
+
# Load FP8 Text Encoder
|
| 34 |
+
# ------------------------
|
| 35 |
+
log("Loading FP8 text encoder...")
|
| 36 |
+
config = AutoConfig.from_pretrained("jiangchengchengNLP/qwen3-4b-fp8-scaled")
|
| 37 |
+
fp8_encoder = AutoModel.from_pretrained(
|
| 38 |
+
"jiangchengchengNLP/qwen3-4b-fp8-scaled",
|
| 39 |
+
config=config,
|
| 40 |
+
device_map="auto",
|
| 41 |
+
torch_dtype=torch.bfloat16,
|
| 42 |
+
)
|
| 43 |
+
pipe.text_encoder = fp8_encoder
|
| 44 |
+
log("FP8 text encoder loaded successfully.")
|
| 45 |
+
log(f"Tokenizer remains: {pipe.tokenizer.__class__.__name__}")
|
| 46 |
+
|
| 47 |
+
# ------------------------
|
| 48 |
+
# PIPELINE DEBUG INFO
|
| 49 |
# ------------------------
|
| 50 |
def pipeline_debug_info(pipe):
|
| 51 |
info = []
|
| 52 |
info.append("=== PIPELINE DEBUG INFO ===")
|
|
|
|
| 53 |
try:
|
| 54 |
tr = pipe.transformer.config
|
| 55 |
info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}")
|
|
|
|
| 56 |
info.append(f"Hidden dim: {tr.get('hidden_dim')}")
|
| 57 |
info.append(f"Attention heads: {tr.get('num_heads')}")
|
| 58 |
info.append(f"Depth (layers): {tr.get('depth')}")
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
info.append(f"Transformer diagnostics failed: {e}")
|
| 64 |
|
|
|
|
| 65 |
try:
|
| 66 |
vae = pipe.vae.config
|
| 67 |
info.append(f"VAE latent channels: {vae.latent_channels}")
|
|
|
|
| 71 |
|
| 72 |
return "\n".join(info)
|
| 73 |
|
|
|
|
| 74 |
def latent_shape_info(h, w, pipe):
|
| 75 |
try:
|
| 76 |
c = pipe.vae.config.latent_channels
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
return f"Latent shape calc failed: {e}"
|
| 83 |
|
|
|
|
| 84 |
# ------------------------
|
| 85 |
+
# IMAGE GENERATION
|
| 86 |
# ------------------------
|
| 87 |
@spaces.GPU
|
| 88 |
def generate_image(prompt, height, width, num_inference_steps, seed, randomize_seed, num_images):
|
|
|
|
| 101 |
else:
|
| 102 |
log(f"Seed: {seed}")
|
| 103 |
|
| 104 |
+
# Clamp images to 1β3
|
| 105 |
num_images = min(max(1, int(num_images)), 3)
|
| 106 |
|
| 107 |
# Debug pipeline info
|
|
|
|
| 122 |
output_type="pil",
|
| 123 |
)
|
| 124 |
|
| 125 |
+
# Latent diagnostics
|
| 126 |
try:
|
| 127 |
log(f"VAE latent channels: {pipe.vae.config.latent_channels}")
|
| 128 |
log(f"VAE scaling factor: {pipe.vae.config.scaling_factor}")
|
|
|
|
| 135 |
|
| 136 |
return result.images, seed, log_buffer.getvalue()
|
| 137 |
|
|
|
|
| 138 |
# ------------------------
|
| 139 |
# GRADIO UI
|
| 140 |
# ------------------------
|
|
|
|
| 147 |
]
|
| 148 |
|
| 149 |
with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo:
|
| 150 |
+
gr.Markdown("# π¨ Z-Image-Turbo β Multi Image (FP8 Text Encoder)")
|
| 151 |
|
| 152 |
with gr.Row():
|
| 153 |
with gr.Column(scale=1):
|
|
|
|
| 171 |
generate_btn = gr.Button("π Generate", variant="primary")
|
| 172 |
|
| 173 |
with gr.Column(scale=1):
|
| 174 |
+
output_images = gr.Gallery(label="Generated Images", type="pil")
|
| 175 |
used_seed = gr.Number(label="Seed Used", interactive=False)
|
| 176 |
debug_log = gr.Textbox(label="Debug Log Output", lines=25, interactive=False)
|
| 177 |
|