Alexander Bagus commited on
Commit
3fd5bc5
·
1 Parent(s): 67a1d0e
Files changed (3) hide show
  1. README.md +2 -0
  2. app.py +20 -10
  3. utils/prompt_utils.py +0 -1
README.md CHANGED
@@ -12,6 +12,8 @@ short_description: Supports Canny, HED, Depth, Pose and MLSD
12
  models:
13
  - Tongyi-MAI/Z-Image-Turbo
14
  - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union
 
 
15
  ---
16
 
17
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
12
  models:
13
  - Tongyi-MAI/Z-Image-Turbo
14
  - alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union
15
+ commands:
16
+ - echo "Starting Space..."
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import gradio as gr
2
  import numpy as np
3
- import random
4
- import json
5
- import spaces
6
- import torch
7
- from diffusers import DiffusionPipeline
8
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
9
  from videox_fun.pipeline import ZImageControlPipeline
10
  from videox_fun.models import ZImageControlTransformer2DModel
@@ -12,6 +8,7 @@ from transformers import AutoTokenizer, Qwen3ForCausalLM
12
  from diffusers import AutoencoderKL
13
  from utils.image_utils import get_image_latent, scale_image
14
  from utils.prompt_utils import polish_prompt
 
15
  # from videox_fun.utils.utils import get_image_latent
16
 
17
 
@@ -61,7 +58,7 @@ tokenizer = AutoTokenizer.from_pretrained(
61
  )
62
  text_encoder = Qwen3ForCausalLM.from_pretrained(
63
  MODEL_LOCAL, subfolder="text_encoder", torch_dtype=weight_dtype,
64
- low_cpu_mem_usage=True,
65
  )
66
  scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3)
67
  pipe = ZImageControlPipeline(
@@ -79,9 +76,22 @@ pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
79
  spaces.aoti_blocks_load(pipe.transformer.layers,
80
  "zerogpu-aoti/Z-Image", variant="fa3")
81
 
82
- def prepare(prompt, input_image):
83
  polished_prompt = polish_prompt(prompt)
84
- return polished_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  @spaces.GPU
87
  def inference(
@@ -141,7 +151,7 @@ with open('static/data.json', 'r') as file:
141
  data = json.load(file)
142
  examples = data['examples']
143
 
144
- with gr.Blocks() as demo:
145
  with gr.Column(elem_id="col-container"):
146
  with gr.Column():
147
  gr.HTML(read_file("static/header.html"))
@@ -244,4 +254,4 @@ with gr.Blocks() as demo:
244
  # )
245
 
246
  if __name__ == "__main__":
247
- demo.launch(mcp_server=True, css=css)
 
1
  import gradio as gr
2
  import numpy as np
3
+ import random, json, spaces, torch
 
 
 
 
4
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
5
  from videox_fun.pipeline import ZImageControlPipeline
6
  from videox_fun.models import ZImageControlTransformer2DModel
 
8
  from diffusers import AutoencoderKL
9
  from utils.image_utils import get_image_latent, scale_image
10
  from utils.prompt_utils import polish_prompt
11
+ from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
12
  # from videox_fun.utils.utils import get_image_latent
13
 
14
 
 
58
  )
59
  text_encoder = Qwen3ForCausalLM.from_pretrained(
60
  MODEL_LOCAL, subfolder="text_encoder", torch_dtype=weight_dtype,
61
+ low_cpu_mem_usage=False,
62
  )
63
  scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3)
64
  pipe = ZImageControlPipeline(
 
76
  spaces.aoti_blocks_load(pipe.transformer.layers,
77
  "zerogpu-aoti/Z-Image", variant="fa3")
78
 
79
+ def prepare(prompt, input_image, control_mode='Canny'):
80
  polished_prompt = polish_prompt(prompt)
81
+
82
+ if control_mode == 'HED':
83
+ processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
84
+ if control_mode =='Midas':
85
+ processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
86
+ if control_mode =='MLSD':
87
+ processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
88
+ if control_mode =='Pose':
89
+ processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
90
+ else:
91
+ processor = CannyDetector()
92
+ control_image = processor(input_image)
93
+
94
+ return polished_prompt, control_image
95
 
96
  @spaces.GPU
97
  def inference(
 
151
  data = json.load(file)
152
  examples = data['examples']
153
 
154
+ with gr.Blocks(css=css) as demo:
155
  with gr.Column(elem_id="col-container"):
156
  with gr.Column():
157
  gr.HTML(read_file("static/header.html"))
 
254
  # )
255
 
256
  if __name__ == "__main__":
257
+ demo.launch(mcp_server=True)
utils/prompt_utils.py CHANGED
@@ -16,7 +16,6 @@ def polish_prompt(original_prompt):
16
  # messages = []
17
  client = InferenceClient()
18
 
19
-
20
  try:
21
  completion = client.chat.completions.create(
22
  rovider="cerebras",
 
16
  # messages = []
17
  client = InferenceClient()
18
 
 
19
  try:
20
  completion = client.chat.completions.create(
21
  rovider="cerebras",