Z-Image-Turbo / app.py
tchung1970's picture
Add Korean localization and CLAUDE.md documentation
47e50c0
import spaces
from dataclasses import dataclass
import json
import logging
import os
import random
import re
import sys
import warnings
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from diffusers import ZImagePipeline
from diffusers.models.transformers.transformer_z_image import ZImageTransformer2DModel
from pe import prompt_template
# ==================== Environment Variables ==================================
MODEL_PATH = os.environ.get("MODEL_PATH", "Tongyi-MAI/Z-Image-Turbo")
ENABLE_COMPILE = os.environ.get("ENABLE_COMPILE", "true").lower() == "true"
ENABLE_WARMUP = os.environ.get("ENABLE_WARMUP", "true").lower() == "true"
ATTENTION_BACKEND = os.environ.get("ATTENTION_BACKEND", "flash_3")
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
# =============================================================================
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings("ignore")
logging.getLogger("transformers").setLevel(logging.ERROR)
RES_CHOICES = {
"1024": [
"1024x1024 ( 1:1 )",
"1152x896 ( 9:7 )",
"896x1152 ( 7:9 )",
"1152x864 ( 4:3 )",
"864x1152 ( 3:4 )",
"1248x832 ( 3:2 )",
"832x1248 ( 2:3 )",
"1280x720 ( 16:9 )",
"720x1280 ( 9:16 )",
"1344x576 ( 21:9 )",
"576x1344 ( 9:21 )",
],
"1280": [
"1280x1280 ( 1:1 )",
"1440x1120 ( 9:7 )",
"1120x1440 ( 7:9 )",
"1472x1104 ( 4:3 )",
"1104x1472 ( 3:4 )",
"1536x1024 ( 3:2 )",
"1024x1536 ( 2:3 )",
"1600x896 ( 16:9 )",
"896x1600 ( 9:16 )", # not 900 coz divided by 16 needed
"1680x720 ( 21:9 )",
"720x1680 ( 9:21 )",
],
}
RESOLUTION_SET = []
for resolutions in RES_CHOICES.values():
RESOLUTION_SET.extend(resolutions)
EXAMPLE_PROMPTS = [
["ํ•œ ๋‚จ์„ฑ๊ณผ ๊ทธ์˜ ํ‘ธ๋“ค์ด ์–ด์šธ๋ฆฌ๋Š” ์˜์ƒ์„ ์ž…๊ณ  ์‹ค๋‚ด ์กฐ๋ช… ์•„๋ž˜ ๊ด€๊ฐ๋“ค์ด ์žˆ๋Š” ๋ฐฐ๊ฒฝ์—์„œ ๊ฐœ ์‡ผ์— ์ฐธ๊ฐ€ํ•˜๊ณ  ์žˆ๋Š” ๋ชจ์Šต."],
[
"๋ถ„์œ„๊ธฐ ์žˆ๋Š” ์–ด๋‘์šด ํ†ค์˜ ์ธ๋ฌผ ์‚ฌ์ง„, ์šฐ์•„ํ•œ ์ค‘๊ตญ ์—ฌ์„ฑ์ด ์–ด๋‘์šด ๋ฐฉ์— ์žˆ๋‹ค. ๊ฐ•ํ•œ ๋น›์ด ์…”ํ„ฐ๋ฅผ ํ†ต๊ณผํ•ด ๊ทธ๋…€์˜ ์–ผ๊ต—์— ๋ฒˆ๊ฐœ ๋ชจ์–‘์˜ ์„ ๋ช…ํ•œ ๋น›๊ณผ ๊ทธ๋ฆผ์ž๋ฅผ ํˆฌ์‚ฌํ•˜๋ฉฐ ํ•œ์ชฝ ๋ˆˆ๋งŒ์„ ์ •ํ™•ํžˆ ๋น„์ถ˜๋‹ค. ๋†’์€ ๋Œ€๋น„, ๋ช…์•” ๊ฒฝ๊ณ„๊ฐ€ ์„ ๋ช…ํ•˜๋ฉฐ, ์‹ ๋น„๋กœ์šด ๋А๋‚Œ, ๋ผ์ด์นด ์นด๋ฉ”๋ผ ์ƒ‰์กฐ."
],
[
"๋ฐ๊ฒŒ ์กฐ๋ช…๋œ ์—˜๋ฆฌ๋ฒ ์ดํ„ฐ ์•ˆ์—์„œ ๊ธด ๊ฒ€์€ ๋จธ๋ฆฌ๋ฅผ ํ•œ ์ Š์€ ๋™์•„์‹œ์•„ ์—ฌ์„ฑ์ด ๊ฑฐ์šธ์„ ํ–ฅํ•ด ์…€์นด๋ฅผ ์ฐ๋Š” ์ค‘๊ฐ„ ๊ฑฐ๋ฆฌ ์Šค๋งˆํŠธํฐ ์…€์นด ์‚ฌ์ง„. ๊ทธ๋…€๋Š” ํฐ์ƒ‰ ๊ฝƒ๋ฌด๋Šฌ๊ฐ€ ์žˆ๋Š” ๊ฒ€์€์ƒ‰ ์˜คํ”„์ˆ„๋” ํฌ๋กญํƒ‘๊ณผ ์–ด๋‘์šด ์ฒญ๋ฐ”์ง€๋ฅผ ์ž…๊ณ  ์žˆ๋‹ค. ๋จธ๋ฆฌ๋ฅผ ์•ฝ๊ฐ„ ๊ธฐ์šธ์ด๊ณ  ์ž…์ˆ ์„ ๋พฐ์กฑํ•˜๊ฒŒ ๋‚ด๋ฐ€์–ด ํ‚ค์Šคํ•˜๋Š” ๋“ฏํ•œ ํฌ์ฆˆ๋กœ ๋งค์šฐ ๊ท€์—ฝ๊ณ  ์žฅ๋‚œ์Šค๋Ÿฌ์šด ๋ชจ์Šต์ด๋‹ค. ์˜ค๋ฅธ์†์— ์ง™์€ ํšŒ์ƒ‰ ์Šค๋งˆํŠธํฐ์„ ๋“ค๊ณ  ์–ผ๊ตด ์ผ๋ถ€๋ฅผ ๊ฐ€๋ฆฌ๊ณ  ์žˆ์œผ๋ฉฐ, ํ›„๋ฉด ์นด๋ฉ”๋ผ ๋ Œ์ฆˆ๊ฐ€ ๊ฑฐ์šธ์„ ํ–ฅํ•˜๊ณ  ์žˆ๋‹ค."
],
[
"๋นจ๊ฐ„ ํ•œํ‘ธ๋ฅผ ์ž…์€ ์ Š์€ ์ค‘๊ตญ ์—ฌ์„ฑ, ์ •๊ตํ•œ ์ž์ˆ˜. ์™„๋ฒฝํ•œ ๋ฉ”์ดํฌ์—…, ๋ถ‰์€ ๊ฝƒ๋ฌด๋Šฌ ์ด๋งˆ ์žฅ์‹. ์ •๊ตํ•œ ๋†’์€ ์ชฝ์ง„ ๋จธ๋ฆฌ, ๊ธˆ๋น› ๋ด‰ํ™ฉ ๋จธ๋ฆฌ ์žฅ์‹, ๋ถ‰์€ ๊ฝƒ, ๊ตฌ์Šฌ. ์—ฌ์ธ๊ณผ ๋‚˜๋ฌด, ์ƒˆ๊ฐ€ ๊ทธ๋ ค์ง„ ๋‘ฅ๊ทผ ์ ‘์ด์‹ ๋ถ€์ฑ„๋ฅผ ๋“ค๊ณ  ์žˆ๋‹ค. ๋„ค์˜จ ๋ฒˆ๊ฐœ ๋ชจ์–‘ ๋žจํ”„ (โšก๏ธ), ๋ฐ์€ ๋…ธ๋ž€์ƒ‰ ๋น›, ํŽผ์นœ ์™ผ์ชฝ ์†๋ฐ”๋‹ฅ ์œ„์—. ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์กฐ๋ช…๋œ ์•ผ์™ธ ๋ฐค ๋ฐฐ๊ฒฝ, ์‹ค๋ฃจ์—ฃ์˜ ๋‹ค์ธต ํƒ‘(์„œ์•ˆ ๋Œ€์•ˆํƒ‘), ํ๋ฆฟํ•œ ์ปฌ๋Ÿฌ ๋จผ ๋ถˆ๋น›๋“ค."
],
[
'''๊ณ ์š”ํ•˜๊ณ  ์žฅ์—„ํ•œ ์ค‘๊ตญ ํ’๊ฒฝ์„ ๋ฌ˜์‚ฌํ•œ ์„ธ๋กœ ํ˜•์‹์˜ ๋””์ง€ํ„ธ ์ผ๋Ÿฌ์ŠคํŠธ๋ ˆ์ด์…˜์œผ๋กœ, ์ „ํ†ต์ ์ธ ์‚ฐ์ˆ˜ํ™” ์Šคํƒ€์ผ์„ ํ˜„๋Œ€์ ์ด๊ณ  ๊น”๋”ํ•œ ๋ฏธํ•™์œผ๋กœ ์žฌํ•ด์„ํ–ˆ๋‹ค. ์žฅ๋ฉด์€ ์ค‘์•™ ๊ณ„๊ณก์„ ๋‘˜๋Ÿฌ์‹ผ ๋‹ค์–‘ํ•œ ํŒŒ๋ž€์ƒ‰๊ณผ ์ฒญ๋ก์ƒ‰ ์Œ์˜์˜ ์šฐ๋š ์†Ÿ์€ ๊ฐ€ํŒŒ๋ฅธ ์ ˆ๋ฒฝ์ด ์ง€๋ฐฐํ•œ๋‹ค. ๋ฉ€๋ฆฌ ์‚ฐ๋“ค์ด ์ธต์ธต์ด ์—ฐํ•œ ํŒŒ๋ž€์ƒ‰๊ณผ ํฐ์ƒ‰ ์•ˆ๊ฐœ ์†์œผ๋กœ ์‚ฌ๋ผ์ง€๋ฉฐ ๊ฐ•ํ•œ ๋Œ€๊ธฐ ์›๊ทผ๊ฐ๊ณผ ๊นŠ์ด๋ฅผ ๋งŒ๋“ค์–ด๋‚ธ๋‹ค. ๊ณ ์š”ํ•œ ์ฒญ๋ก์ƒ‰ ๊ฐ•์ด ๊ตฌ์„ฑ์˜ ์ค‘์•™์„ ๊ฐ€๋กœ์งˆ๋Ÿฌ ํ๋ฅด๋ฉฐ, ์ž‘์€ ์ „ํ†ต ์ค‘๊ตญ ๋ฐฐ, ์•„๋งˆ๋„ ์‚ผํŒ์ด ๋ฌผ ์œ„๋ฅผ ํ•ญํ•ดํ•˜๊ณ  ์žˆ๋‹ค. ๋ฐฐ๋Š” ๋ฐ์€ ๋…ธ๋ž€์ƒ‰ ์ฒœ๋ง‰๊ณผ ๋ถ‰์€ ์„ ์ฒด๋ฅผ ๊ฐ€์ง€๊ณ  ์žˆ์œผ๋ฉฐ ๋’ค์— ๋ถ€๋“œ๋Ÿฌ์šด ๋ฌผ๊ฒฐ์„ ๋‚จ๊ธด๋‹ค. ์—ฌ๋Ÿฌ ๋ช…์˜ ํฌ๋ฏธํ•œ ์ธ๋ฌผ๋“ค์„ ํƒœ์šฐ๊ณ  ์žˆ๋‹ค. ๋…น์ƒ‰ ๋‚˜๋ฌด์™€ ์ผ๋ถ€ ๋งจ๊ฐ€์ง€ ๋‚˜๋ฌด๋ฅผ ํฌํ•จํ•œ ๋“œ๋ฌธ๋“œ๋ฌธํ•œ ์‹์ƒ์ด ๋ฐ”์œ„ ์„ ๋ฐ˜๊ณผ ๋ด‰์šฐ๋ฆฌ์— ๋ถ™์–ด ์žˆ๋‹ค. ์ „์ฒด ์กฐ๋ช…์€ ๋ถ€๋“œ๋Ÿฝ๊ณ  ํ™•์‚ฐ๋˜์–ด ์ „์ฒด ์žฅ๋ฉด์— ํ‰์˜จํ•œ ๋น›์„ ๋“œ๋ฆฌ์šด๋‹ค. ์ด๋ฏธ์ง€ ์ค‘์•™์— ํ…์ŠคํŠธ๊ฐ€ ๊ฒน์ณ์ ธ ์žˆ๋‹ค. ํ…์ŠคํŠธ ๋ธ”๋ก ์ƒ๋‹จ์—๋Š” ์–‘์‹ํ™”๋œ ๋ฌธ์ž๊ฐ€ ํฌํ•จ๋œ ์ž‘๊ณ  ๋นจ๊ฐ„์ƒ‰์˜ ์›ํ˜• ๋„์žฅ ๊ฐ™์€ ๋กœ๊ณ ๊ฐ€ ์žˆ๋‹ค. ๊ทธ ์•„๋ž˜ ์ž‘์€ ๊ฒ€์€์ƒ‰ ์‚ฐ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ 'Zao-Xiang * East Beauty & West Fashion * Z-Image'๋ผ๋Š” ๋‹จ์–ด๊ฐ€ ์žˆ๋‹ค. ๊ทธ ๋ฐ”๋กœ ์•„๋ž˜ ๋” ํฐ ์šฐ์•„ํ•œ ๊ฒ€์€์ƒ‰ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ 'SHOW & SHARE CREATIVITY WITH THE WORLD'๋ผ๋Š” ๋‹จ์–ด๊ฐ€ ์žˆ๋‹ค. ๊ทธ ์ค‘์—๋Š” "SHOW & SHARE", "CREATIVITY", "WITH THE WORLD"๊ฐ€ ์žˆ๋‹ค.'''
],
[
"""๊ฐ€์ƒ์˜ ์˜ํ™” ใ€ŠํšŒ์ƒ์˜ ๋ง›ใ€‹(The Taste of Memory)์˜ ์˜ํ™” ํฌ์Šคํ„ฐ. ์žฅ๋ฉด์€ ์†Œ๋ฐ•ํ•œ 19์„ธ๊ธฐ ์Šคํƒ€์ผ ์ฃผ๋ฐฉ์— ์„ค์ •๋˜์–ด ์žˆ๋‹ค. ํ™”๋ฉด ์ค‘์•™์— ์ ๊ฐˆ์ƒ‰ ๋จธ๋ฆฌ์™€ ์ž‘์€ ์ฝง์ˆ˜์—ผ์„ ๊ฐ€์ง„ ์ค‘๋…„ ๋‚จ์„ฑ(๋ฐฐ์šฐ ์•„์„œ ํŽœํ• ๋ฆฌ๊ฑด ์—ฐ๊ธฐ)์ด ๋‚˜๋ฌด ํ…Œ์ด๋ธ” ๋’ค์— ์„œ ์žˆ์œผ๋ฉฐ, ํฐ์ƒ‰ ์…”์ธ , ๊ฒ€์€์ƒ‰ ์กฐ๋ผ, ๋ฒ ์ด์ง€์ƒ‰ ์•ž์น˜๋งˆ๋ฅผ ์ž…๊ณ  ์žˆ๊ณ  ํ•œ ์—ฌ์„ฑ์„ ๋ฐ”๋ผ๋ณด๋ฉฐ ์†์— ํฐ ๋ฉ์–ด๋ฆฌ์˜ ์ƒ๊ณ ๊ธฐ๋ฅผ ๋“ค๊ณ  ์žˆ์œผ๋ฉฐ ์•„๋ž˜์—๋Š” ๋‚˜๋ฌด ๋„๋งˆ๊ฐ€ ์žˆ๋‹ค. ๊ทธ์˜ ์˜ค๋ฅธ์ชฝ์—๋Š” ๋†’์€ ์ชฝ์ง„ ๋จธ๋ฆฌ๋ฅผ ํ•œ ๊ฒ€์€ ๋จธ๋ฆฌ ์—ฌ์„ฑ(๋ฐฐ์šฐ ์—˜๋ฆฌ๋„ˆ ๋ฐด์Šค ์—ฐ๊ธฐ)์ด ํ…Œ์ด๋ธ”์— ๊ธฐ๋Œ€์–ด ๊ทธ์—๊ฒŒ ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ๋ฏธ์†Œ์ง“๊ณ  ์žˆ๋‹ค. ๊ทธ๋…€๋Š” ์—ฐํ•œ ์ƒ‰ ์…”์ธ ์™€ ์ƒ๋‹จ์€ ํฐ์ƒ‰, ํ•˜๋‹จ์€ ํŒŒ๋ž€์ƒ‰์ธ ๊ธด ์น˜๋งˆ๋ฅผ ์ž…๊ณ  ์žˆ๋‹ค. ํ…Œ์ด๋ธ” ์œ„์—๋Š” ๋‹ค์ง„ ํŒŒ์™€ ์–‘๋ฐฐ์ถ” ์ฑ„๊ฐ€ ์žˆ๋Š” ๋„๋งˆ ์™ธ์—๋„ ํฐ์ƒ‰ ๋„์ž๊ธฐ ์ ‘์‹œ, ์‹ ์„ ํ•œ ํ—ˆ๋ธŒ๊ฐ€ ์žˆ๊ณ , ์™ผ์ชฝ ๋‚˜๋ฌด ์ƒ์ž ์œ„์—๋Š” ์ง™์€ ์ƒ‰ ํฌ๋„ ํ•œ ์†ก์ด๊ฐ€ ๋†“์—ฌ ์žˆ๋‹ค. ๋ฐฐ๊ฒฝ์€ ๊ฑฐ์น ๊ฒŒ ํšŒ๋ฐฑ์ƒ‰์œผ๋กœ ๋ฏธ์žฅ๋œ ๋ฒฝ์ด๋ฉฐ ํ’๊ฒฝํ™” ํ•œ ์ ์ด ๊ฑธ๋ ค ์žˆ๋‹ค. ๊ฐ€์žฅ ์˜ค๋ฅธ์ชฝ ์ž‘์—…๋Œ€ ์œ„์—๋Š” ๋ณต๊ณ ํ’ ์˜ค์ผ ๋žจํ”„๊ฐ€ ๋†“์—ฌ ์žˆ๋‹ค. ํฌ์Šคํ„ฐ์—๋Š” ๋งŽ์€ ํ…์ŠคํŠธ ์ •๋ณด๊ฐ€ ์žˆ๋‹ค. ์™ผ์ชฝ ์ƒ๋‹จ์—๋Š” ํฐ์ƒ‰ ์‚ฐ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ "ARTISAN FILMS PRESENTS"๊ฐ€ ์žˆ๊ณ  ๊ทธ ์•„๋ž˜์— "ELEANOR VANCE"์™€ "ACADEMY AWARDยฎ WINNER"๊ฐ€ ์žˆ๋‹ค. ์˜ค๋ฅธ์ชฝ ์ƒ๋‹จ์—๋Š” "ARTHUR PENHALIGON"๊ณผ "GOLDEN GLOBEยฎ AWARD WINNER"๊ฐ€ ์“ฐ์—ฌ ์žˆ๋‹ค. ์ƒ๋‹จ ์ค‘์•™์—๋Š” ์„ ๋Œ„์Šค ์˜ํ™”์ œ ์›”๊ณ„๊ด€ ๋กœ๊ณ ๊ฐ€ ์žˆ๊ณ  ์•„๋ž˜์— "SUNDANCE FILM FESTIVAL GRAND JURY PRIZE 2024"๊ฐ€ ์“ฐ์—ฌ ์žˆ๋‹ค. ์ฃผ์š” ์ œ๋ชฉ "THE TASTE OF MEMORY"๋Š” ํฐ์ƒ‰์˜ ํฐ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ ํ•˜๋‹จ์— ๋ˆˆ์— ๋„๊ฒŒ ํ‘œ์‹œ๋˜์–ด ์žˆ๋‹ค. ์ œ๋ชฉ ์•„๋ž˜์—๋Š” "A FILM BY Tongyi Interaction Lab"์ด ๋ช…์‹œ๋˜์–ด ์žˆ๋‹ค. ํ•˜๋‹จ ์˜์—ญ์—๋Š” ํฐ์ƒ‰ ์ž‘์€ ๊ธ€์”จ๋กœ "SCREENPLAY BY ANNA REID", "CULINARY DIRECTION BY JAMES CARTER" ๋ฐ Artisan Films, Riverstone Pictures, Heritage Media ๋“ฑ ์ˆ˜๋งŽ์€ ์ œ์ž‘์‚ฌ ๋กœ๊ณ ๋ฅผ ํฌํ•จํ•œ ์ „์ฒด ์ถœ์—ฐ์ง„ ๋ฐ ์ œ์ž‘์ง„ ๋ช…๋‹จ์ด ๋‚˜์—ด๋˜์–ด ์žˆ๋‹ค. ์ „์ฒด์ ์ธ ์Šคํƒ€์ผ์€ ์‚ฌ์‹ค์ฃผ์˜๋กœ ๋”ฐ๋œปํ•˜๊ณ  ๋ถ€๋“œ๋Ÿฌ์šด ์กฐ๋ช… ๋ฐฉ์‹์„ ์ฑ„ํƒํ•˜์—ฌ ์นœ๋ฐ€ํ•œ ๋ถ„์œ„๊ธฐ๋ฅผ ์กฐ์„ฑํ•œ๋‹ค. ์ƒ‰์กฐ๋Š” ๊ฐˆ์ƒ‰, ๋ฒ ์ด์ง€, ๋ถ€๋“œ๋Ÿฌ์šด ๋…น์ƒ‰ ๋“ฑ ๋Œ€์ง€์ƒ‰ ํ†ค์ด ์ฃผ๋ฅผ ์ด๋ฃฌ๋‹ค. ๋‘ ๋ฐฐ์šฐ์˜ ๋ชธ์€ ๋ชจ๋‘ ํ—ˆ๋ฆฌ์—์„œ ์ž˜๋ ค ์žˆ๋‹ค."""
],
[
"""์ •์‚ฌ๊ฐํ˜• ๊ตฌ๋„์˜ ํด๋กœ์ฆˆ์—… ์‚ฌ์ง„์œผ๋กœ, ๊ฑฐ๋Œ€ํ•˜๊ณ  ์„ ๋ช…ํ•œ ๋…น์ƒ‰ ์‹๋ฌผ ์žŽ์ด ์ฃผ์ œ์ด๋ฉฐ ํ…์ŠคํŠธ๊ฐ€ ๊ฒน์ณ์ ธ ํฌ์Šคํ„ฐ๋‚˜ ์žก์ง€ ํ‘œ์ง€ ๊ฐ™์€ ์™ธ๊ด€์„ ๊ฐ–์ถ”๊ณ  ์žˆ๋‹ค. ์ฃผ์š” ํ”ผ์‚ฌ์ฒด๋Š” ์™ผ์ชฝ ํ•˜๋‹จ์—์„œ ์˜ค๋ฅธ์ชฝ ์ƒ๋‹จ์œผ๋กœ ๋Œ€๊ฐ์„ ์œผ๋กœ ๊ตฌ๋ถ€๋Ÿฌ์ ธ ํ”„๋ ˆ์ž„์„ ๊ฐ€๋กœ์ง€๋ฅด๋Š” ๋‘๊ป๊ณ  ์™์Šค ๊ฐ™์€ ์งˆ๊ฐ์˜ ์žŽ์ด๋‹ค. ํ‘œ๋ฉด์ด ๋งค์šฐ ๋ฐ˜์‚ฌ์ ์ด์–ด์„œ ๋ฐ์€ ์ง์‚ฌ๊ด‘์›์„ ํฌ์ฐฉํ•˜์—ฌ ๋‘๋“œ๋Ÿฌ์ง„ ํ•˜์ด๋ผ์ดํŠธ๋ฅผ ํ˜•์„ฑํ•˜๊ณ  ๋ฐ์€ ๋ฉด ์•„๋ž˜ ํ‰ํ–‰ํ•œ ๋ฏธ์„ธ ์žŽ๋งฅ์ด ๋“œ๋Ÿฌ๋‚œ๋‹ค. ๋ฐฐ๊ฒฝ์€ ๋‹ค๋ฅธ ์ง™์€ ๋…น์ƒ‰ ์žŽ๋“ค๋กœ ๊ตฌ์„ฑ๋˜์–ด ์žˆ์œผ๋ฉฐ ์•ฝ๊ฐ„ ์ดˆ์ ์ด ํ๋ ค์ ธ ์–•์€ ํ”ผ์‚ฌ๊ณ„ ์‹ฌ๋„ ํšจ๊ณผ๋ฅผ ๋งŒ๋“ค์–ด ์ „๊ฒฝ์˜ ์ฃผ์š” ์žŽ์„ ๊ฐ•์กฐํ•œ๋‹ค. ์ „์ฒด์ ์ธ ์Šคํƒ€์ผ์€ ์‚ฌ์‹ค์ ์ธ ์‚ฌ์ง„์œผ๋กœ ๋ฐ์€ ์žŽ๊ณผ ์–ด๋‘์šด ๊ทธ๋ฆผ์ž ๋ฐฐ๊ฒฝ ์‚ฌ์ด์— ๋†’์€ ๋Œ€๋น„๋ฅผ ํ˜•์„ฑํ•œ๋‹ค. ์ด๋ฏธ์ง€์—๋Š” ์—ฌ๋Ÿฌ ๋ Œ๋”๋ง๋œ ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋‹ค. ์™ผ์ชฝ ์ƒ๋‹จ์—๋Š” ํฐ์ƒ‰ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ "PIXEL-PEEPERS GUILD Presents"๋ผ๋Š” ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋‹ค. ์˜ค๋ฅธ์ชฝ ์ƒ๋‹จ์—๋„ ํฐ์ƒ‰ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ "[Instant Noodle] ๆณก้ข่ฐƒๆ–™ๅŒ…"๋ผ๋Š” ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋‹ค. ์™ผ์ชฝ์—๋Š” ์ˆ˜์ง์œผ๋กœ ๋ฐฐ์—ด๋œ ์ œ๋ชฉ "Render Distance: Max"๊ฐ€ ํฐ์ƒ‰ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ ๋˜์–ด ์žˆ๋‹ค. ์™ผ์ชฝ ํ•˜๋‹จ์—๋Š” ๋‹ค์„ฏ ๊ฐœ์˜ ๊ฑฐ๋Œ€ํ•œ ํฐ์ƒ‰ ์†ก์ฒด ํ•œ์ž "ๆ˜พๅกๅœจ...็‡ƒ็ƒง"๊ฐ€ ์žˆ๋‹ค. ์˜ค๋ฅธ์ชฝ ํ•˜๋‹จ์—๋Š” ์ž‘์€ ํฐ์ƒ‰ ์„ธ๋ฆฌํ”„ ๊ธ€๊ผด๋กœ "Leica Glowโ„ข Unobtanium X-1"์ด ์žˆ๊ณ , ๊ทธ ๋ฐ”๋กœ ์œ„์—๋Š” ํฐ์ƒ‰ ์†ก์ฒด๋กœ ์“ฐ์ธ ์ด๋ฆ„ "่”กๅ‡ "๊ฐ€ ์žˆ๋‹ค. ์‹๋ณ„๋œ ํ•ต์‹ฌ ๊ฐœ์ฒด์—๋Š” ๋ธŒ๋žœ๋“œ ํ”ฝ์…€ ํ”ผํผ์Šค ๊ธธ๋“œ, ์ œํ’ˆ ๋ผ์ธ ์ธ์Šคํ„ดํŠธ ๋ˆ„๋“ค ์กฐ๋ฏธ๋ฃŒ ํŒจํ‚ค์ง€, ์นด๋ฉ”๋ผ ๋ชจ๋ธ Unobtaniumโ„ข X-1 ๋ฐ ์‚ฌ์ง„๊ฐ€ ์ด๋ฆ„ Zao-Xiang์ด ํฌํ•จ๋œ๋‹ค."""
],
]
def get_resolution(resolution):
match = re.search(r"(\d+)\s*[ร—x]\s*(\d+)", resolution)
if match:
return int(match.group(1)), int(match.group(2))
return 1024, 1024
def load_models(model_path, enable_compile=False, attention_backend="native"):
print(f"Loading models from {model_path}...")
use_auth_token = HF_TOKEN if HF_TOKEN else True
if not os.path.exists(model_path):
vae = AutoencoderKL.from_pretrained(
f"{model_path}",
subfolder="vae",
torch_dtype=torch.bfloat16,
device_map="cuda",
use_auth_token=use_auth_token,
)
text_encoder = AutoModel.from_pretrained(
f"{model_path}",
subfolder="text_encoder",
torch_dtype=torch.bfloat16,
device_map="cuda",
use_auth_token=use_auth_token,
).eval()
tokenizer = AutoTokenizer.from_pretrained(f"{model_path}", subfolder="tokenizer", use_auth_token=use_auth_token)
else:
vae = AutoencoderKL.from_pretrained(
os.path.join(model_path, "vae"), torch_dtype=torch.bfloat16, device_map="cuda"
)
text_encoder = AutoModel.from_pretrained(
os.path.join(model_path, "text_encoder"),
torch_dtype=torch.bfloat16,
device_map="cuda",
).eval()
tokenizer = AutoTokenizer.from_pretrained(os.path.join(model_path, "tokenizer"))
tokenizer.padding_side = "left"
if enable_compile:
print("Enabling torch.compile optimizations...")
torch._inductor.config.conv_1x1_as_mm = True
torch._inductor.config.coordinate_descent_tuning = True
torch._inductor.config.epilogue_fusion = False
torch._inductor.config.coordinate_descent_check_all_directions = True
torch._inductor.config.max_autotune_gemm = True
torch._inductor.config.max_autotune_gemm_backends = "TRITON,ATEN"
torch._inductor.config.triton.cudagraphs = False
pipe = ZImagePipeline(scheduler=None, vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, transformer=None)
if enable_compile:
pipe.vae.disable_tiling()
if not os.path.exists(model_path):
transformer = ZImageTransformer2DModel.from_pretrained(
f"{model_path}", subfolder="transformer", use_auth_token=use_auth_token
).to("cuda", torch.bfloat16)
else:
transformer = ZImageTransformer2DModel.from_pretrained(os.path.join(model_path, "transformer")).to(
"cuda", torch.bfloat16
)
pipe.transformer = transformer
pipe.transformer.set_attention_backend(attention_backend)
if enable_compile:
print("Compiling transformer...")
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs", fullgraph=False)
pipe.to("cuda", torch.bfloat16)
return pipe
def generate_image(
pipe,
prompt,
resolution="1024x1024",
seed=42,
guidance_scale=5.0,
num_inference_steps=50,
shift=3.0,
max_sequence_length=512,
progress=gr.Progress(track_tqdm=True),
):
width, height = get_resolution(resolution)
generator = torch.Generator("cuda").manual_seed(seed)
scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=shift)
pipe.scheduler = scheduler
image = pipe(
prompt=prompt,
height=height,
width=width,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
max_sequence_length=max_sequence_length,
).images[0]
return image
def warmup_model(pipe, resolutions):
print("Starting warmup phase...")
dummy_prompt = "warmup"
for res_str in resolutions:
print(f"Warming up for resolution: {res_str}")
try:
for i in range(3):
generate_image(
pipe,
prompt=dummy_prompt,
resolution=res_str,
num_inference_steps=9,
guidance_scale=0.0,
seed=42 + i,
)
except Exception as e:
print(f"Warmup failed for {res_str}: {e}")
print("Warmup completed.")
# ==================== Prompt Expander ====================
@dataclass
class PromptOutput:
status: bool
prompt: str
seed: int
system_prompt: str
message: str
class PromptExpander:
def __init__(self, backend="api", **kwargs):
self.backend = backend
def decide_system_prompt(self, template_name=None):
return prompt_template
class APIPromptExpander(PromptExpander):
def __init__(self, api_config=None, **kwargs):
super().__init__(backend="api", **kwargs)
self.api_config = api_config or {}
self.client = self._init_api_client()
def _init_api_client(self):
try:
from openai import OpenAI
api_key = self.api_config.get("api_key") or DASHSCOPE_API_KEY
base_url = self.api_config.get("base_url", "https://dashscope.aliyuncs.com/compatible-mode/v1")
if not api_key:
print("Warning: DASHSCOPE_API_KEY not found.")
return None
return OpenAI(api_key=api_key, base_url=base_url)
except ImportError:
print("Please install openai: pip install openai")
return None
except Exception as e:
print(f"Failed to initialize API client: {e}")
return None
def __call__(self, prompt, system_prompt=None, seed=-1, **kwargs):
return self.extend(prompt, system_prompt, seed, **kwargs)
def extend(self, prompt, system_prompt=None, seed=-1, **kwargs):
if self.client is None:
return PromptOutput(False, "", seed, system_prompt, "API client not initialized")
if system_prompt is None:
system_prompt = self.decide_system_prompt()
if "{prompt}" in system_prompt:
system_prompt = system_prompt.format(prompt=prompt)
prompt = " "
try:
model = self.api_config.get("model", "qwen3-max-preview")
response = self.client.chat.completions.create(
model=model,
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}],
temperature=0.7,
top_p=0.8,
)
content = response.choices[0].message.content
json_start = content.find("```json")
if json_start != -1:
json_end = content.find("```", json_start + 7)
try:
json_str = content[json_start + 7 : json_end].strip()
data = json.loads(json_str)
expanded_prompt = data.get("revised_prompt", content)
except:
expanded_prompt = content
else:
expanded_prompt = content
return PromptOutput(
status=True, prompt=expanded_prompt, seed=seed, system_prompt=system_prompt, message=content
)
except Exception as e:
return PromptOutput(False, "", seed, system_prompt, str(e))
def create_prompt_expander(backend="api", **kwargs):
if backend == "api":
return APIPromptExpander(**kwargs)
raise ValueError("Only 'api' backend is supported.")
pipe = None
prompt_expander = None
def init_app():
global pipe, prompt_expander
try:
pipe = load_models(MODEL_PATH, enable_compile=ENABLE_COMPILE, attention_backend=ATTENTION_BACKEND)
print(f"Model loaded. Compile: {ENABLE_COMPILE}, Backend: {ATTENTION_BACKEND}")
if ENABLE_WARMUP:
all_resolutions = []
for cat in RES_CHOICES.values():
all_resolutions.extend(cat)
warmup_model(pipe, all_resolutions)
except Exception as e:
print(f"Error loading model: {e}")
pipe = None
try:
prompt_expander = create_prompt_expander(backend="api", api_config={"model": "qwen3-max-preview"})
print("Prompt expander initialized.")
except Exception as e:
print(f"Error initializing prompt expander: {e}")
prompt_expander = None
def prompt_enhance(prompt, enable_enhance):
if not enable_enhance or not prompt_expander:
return prompt, "ํ”„๋กฌํ”„ํŠธ ํ–ฅ์ƒ์ด ๋น„ํ™œ์„ฑํ™”๋˜์—ˆ๊ฑฐ๋‚˜ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
if not prompt.strip():
return "", "ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
try:
result = prompt_expander(prompt)
if result.status:
return result.prompt, result.message
else:
return prompt, f"ํ–ฅ์ƒ ์‹คํŒจ: {result.message}"
except Exception as e:
return prompt, f"์˜ค๋ฅ˜: {str(e)}"
@spaces.GPU
def generate(
prompt, resolution="1024x1024 ( 1:1 )", seed=42, steps=9, shift=3.0, enhance=False, random_seed=True, gallery_images=None, progress=gr.Progress(track_tqdm=True)
):
"""
Generate an image using the Z-Image model based on the provided prompt and settings.
This function is triggered when the user clicks the "Generate" button. It processes
the input prompt (optionally enhancing it), configures generation parameters, and
produces an image using the Z-Image diffusion transformer pipeline.
Args:
prompt (str): Text prompt describing the desired image content
resolution (str): Output resolution in format "WIDTHxHEIGHT ( RATIO )" (e.g., "1024x1024 ( 1:1 )")
valid options, 1024 category:
- "1024x1024 ( 1:1 )"
- "1152x896 ( 9:7 )"
- "896x1152 ( 7:9 )"
- "1152x864 ( 4:3 )"
- "864x1152 ( 3:4 )"
- "1248x832 ( 3:2 )"
- "832x1248 ( 2:3 )"
- "1280x720 ( 16:9 )"
- "720x1280 ( 9:16 )"
- "1344x576 ( 21:9 )"
- "576x1344 ( 9:21 )"
1280 category:
- "1280x1280 ( 1:1 )"
- "1440x1120 ( 9:7 )"
- "1120x1440 ( 7:9 )"
- "1472x1104 ( 4:3 )"
- "1104x1472 ( 3:4 )"
- "1536x1024 ( 3:2 )"
- "1024x1536 ( 2:3 )"
- "1600x896 ( 16:9 )"
- "896x1600 ( 9:16 )"
- "1680x720 ( 21:9 )"
- "720x1680 ( 9:21 )"
seed (int): Seed for reproducible generation
steps (int): Number of inference steps for the diffusion process
shift (float): Time shift parameter for the flow matching scheduler
enhance (bool): This was Whether to enhance the prompt (DISABLED! Do not use)
random_seed (bool): Whether to generate a new random seed, if True will ignore the seed input
gallery_images (list): List of previously generated images to append to (only needed for the Gradio UI)
progress (gr.Progress): Gradio progress tracker for displaying generation progress (only needed for the Gradio UI)
Returns:
tuple: (gallery_images, seed_str, seed_int)
- gallery_images: Updated list of generated images including the new image
- seed_str: String representation of the seed used for generation
- seed_int: Integer representation of the seed used for generation
"""
if pipe is None:
raise gr.Error("๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
final_prompt = prompt
if enhance:
final_prompt, _ = prompt_enhance(prompt, True)
print(f"Enhanced prompt: {final_prompt}")
if random_seed:
new_seed = random.randint(1, 1000000)
else:
new_seed = seed if seed != -1 else random.randint(1, 1000000)
try:
resolution_str = resolution.split(" ")[0]
except:
resolution_str = "1024x1024"
image = generate_image(
pipe=pipe,
prompt=final_prompt,
resolution=resolution_str,
seed=new_seed,
guidance_scale=0.0,
num_inference_steps=int(steps + 1),
shift=shift,
)
if gallery_images is None:
gallery_images = []
gallery_images.append(image)
return gallery_images, str(new_seed), int(new_seed)
init_app()
# ==================== AoTI (Ahead of Time Inductor compilation) ====================
pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
with gr.Blocks(title="Z-Image ๋ฐ๋ชจ") as demo:
gr.Markdown(
"""<div align="center">
# Z-Image ์ด๋ฏธ์ง€ ์ƒ์„ฑ ๋ฐ๋ชจ
[![GitHub](https://img.shields.io/badge/GitHub-Z--Image-181717?logo=github&logoColor=white)](https://github.com/Tongyi-MAI/Z-Image)
*๋‹จ์ผ ์ŠคํŠธ๋ฆผ ๋””ํ“จ์ „ ํŠธ๋žœ์Šคํฌ๋จธ๋ฅผ ์‚ฌ์šฉํ•œ ํšจ์œจ์ ์ธ ์ด๋ฏธ์ง€ ์ƒ์„ฑ ๊ธฐ๋ฐ˜ ๋ชจ๋ธ*
</div>"""
)
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(label="ํ”„๋กฌํ”„ํŠธ", lines=3, placeholder="ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...")
# PE components (Temporarily disabled)
# with gr.Row():
# enable_enhance = gr.Checkbox(label="ํ”„๋กฌํ”„ํŠธ ํ–ฅ์ƒ (DashScope)", value=False)
# enhance_btn = gr.Button("ํ–ฅ์ƒ๋งŒ ์‹คํ–‰")
with gr.Row():
choices = [int(k) for k in RES_CHOICES.keys()]
res_cat = gr.Dropdown(value=1024, choices=choices, label="ํ•ด์ƒ๋„ ์นดํ…Œ๊ณ ๋ฆฌ")
initial_res_choices = RES_CHOICES["1024"]
resolution = gr.Dropdown(value=initial_res_choices[0], choices=RESOLUTION_SET, label="๋„ˆ๋น„ x ๋†’์ด (๋น„์œจ)")
with gr.Row():
seed = gr.Number(label="์‹œ๋“œ", value=42, precision=0)
random_seed = gr.Checkbox(label="๋žœ๋ค ์‹œ๋“œ", value=True)
with gr.Row():
steps = gr.Slider(label="์Šคํ… ์ˆ˜", minimum=1, maximum=100, value=8, step=1, interactive=False)
shift = gr.Slider(label="์‹œ๊ฐ„ ์ด๋™", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
generate_btn = gr.Button("์ƒ์„ฑ", variant="primary")
# Example prompts
gr.Markdown("### ๐Ÿ“ ์˜ˆ์ œ ํ”„๋กฌํ”„ํŠธ")
gr.Examples(examples=EXAMPLE_PROMPTS, inputs=prompt_input, label=None)
with gr.Column(scale=1):
output_gallery = gr.Gallery(
label="์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€", columns=2, rows=2, height=600, object_fit="contain", format="png", interactive=False
)
used_seed = gr.Textbox(label="์‚ฌ์šฉ๋œ ์‹œ๋“œ", interactive=False)
def update_res_choices(_res_cat):
if str(_res_cat) in RES_CHOICES:
res_choices = RES_CHOICES[str(_res_cat)]
else:
res_choices = RES_CHOICES["1024"]
return gr.update(value=res_choices[0], choices=res_choices)
res_cat.change(update_res_choices, inputs=res_cat, outputs=resolution, api_visibility="private")
# PE enhancement button (Temporarily disabled)
# enhance_btn.click(
# prompt_enhance,
# inputs=[prompt_input, enable_enhance],
# outputs=[prompt_input, final_prompt_output]
# )
# Dummy enable_enhance variable set to False
enable_enhance = gr.State(value=False)
generate_btn.click(
generate,
inputs=[prompt_input, resolution, seed, steps, shift, enable_enhance, random_seed, output_gallery],
outputs=[output_gallery, used_seed, seed],
api_visibility="public",
)
css='''
.fillable{max-width: 1230px !important}
'''
if __name__ == "__main__":
demo.launch(css=css, mcp_server=True)