File size: 10,846 Bytes
6d35d97 91d9f9d 6d35d97 a67076d 6d35d97 92153b4 5598aeb 92153b4 8ab76da 1dc3e2a 1371493 5ea5de7 3ce4524 a2ea69c f531707 a2ea69c 5ea5de7 32006ad 5ea5de7 3ce4524 c5c2ec0 5ea5de7 c5c2ec0 5ea5de7 c5c2ec0 5ea5de7 c5c2ec0 5ea5de7 42ea091 a67076d 46edcef 1371493 a67076d 1371493 92153b4 a67076d 92153b4 a67076d be95d7f a67076d 92153b4 a67076d 92153b4 a67076d 1371493 e1f3a33 1371493 c5c2ec0 e1f3a33 c5c2ec0 1371493 c5c2ec0 5ea5de7 c5c2ec0 8c252b5 e63cb33 bc3f915 df75977 bc3f915 46edcef df75977 bc3f915 18f8353 46edcef df75977 bc3f915 8c252b5 5ea5de7 dce2c34 46edcef 5ea5de7 54421e6 46edcef 5ea5de7 46edcef a67076d 74090ff df75977 bc3f915 32006ad 46edcef 32006ad 6639f79 32006ad 54421e6 e516d68 a67076d 1371493 92153b4 a67076d ac32050 a2ea69c 46edcef bc3f915 46edcef a67076d 1371493 74090ff 1371493 46edcef 97bcee3 46edcef 97bcee3 1371493 c5c2ec0 a67076d 46edcef 54421e6 1371493 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
try:
import spaces
except ImportError:
class spaces:
@staticmethod
def GPU(fn):
return fn
import gradio as gr
import numpy as np
import imageio
import torch
from diffusers import AutoencoderKLWan
from vsfwan.pipeline import WanPipeline
from vsfwan.processor import WanAttnProcessor2_0
from diffusers import WanVACEPipeline
import uuid
import sys
import os
model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
from src.sd3_pipeline import VSFStableDiffusion3Pipeline
pipe = VSFStableDiffusion3Pipeline.from_pretrained(
"stabilityai/stable-diffusion-3.5-large-turbo",
torch_dtype=torch.bfloat16,
hf_token=os.environ.get("HF_TOKEN", None)
)
from nag import NAGStableDiffusion3Pipeline
nag_pipe = NAGStableDiffusion3Pipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
)
from huggingface_hub import CommitScheduler, InferenceClient
from PIL import Image
import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4
IMAGE_DATASET_DIR = Path("image_dataset") / f"train/{uuid4()}"
IMAGE_DATASET_DIR.mkdir(parents=True, exist_ok=True)
IMAGE_JSONL_PATH = IMAGE_DATASET_DIR / "metadata.jsonl"
scheduler = CommitScheduler(
repo_id="weathon/vsf-log",
repo_type="dataset",
folder_path=IMAGE_DATASET_DIR,
path_in_repo=IMAGE_DATASET_DIR.name,
every=1
)
def save_image(prompt: str, negative_prompt: str, img_vsf: Image, img_nag: Image, img_normal: Image, parameters: dict) -> None:
vsf_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_vsf.png"
nag_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_nag.png"
normal_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_normal.png"
with scheduler.lock:
img_vsf.save(vsf_image_path)
img_nag.save(nag_image_path)
img_normal.save(normal_image_path)
with IMAGE_JSONL_PATH.open("a") as f:
json.dump({"prompt": prompt, "negative_prompt": negative_prompt, "vsf_image_path": str(vsf_image_path), "nag_image_path": str(nag_image_path), "normal_image_path": str(normal_image_path), "parameters": parameters, "timestamp": datetime.utcnow().isoformat()}, f)
f.write("\n")
pipe = pipe.to("cuda")
nag_pipe = nag_pipe.to("cuda")
import os
@spaces.GPU
def generate_image(positive_prompt, negative_prompt, guidance_scale, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step, progress=gr.Progress(track_tqdm=False)):
global pipe, nag_pipe
lambda total: progress.tqdm(range(total))
print(f"Generating image with params: {positive_prompt}, {negative_prompt}, {guidance_scale}, {bias}, {step}")
output = pipe(
prompt=positive_prompt,
negative_prompt=negative_prompt,
num_inference_steps=step,
guidance_scale=0.0,
scale=guidance_scale,
offset=bias,
generator=torch.Generator(device="cuda").manual_seed(seed),
).images[0]
os.makedirs("images", exist_ok=True)
path = f"images/{uuid.uuid4().hex}.png"
output.save(path)
output_path = path
print(f"Image saved to {output_path}")
output_nag = nag_pipe(
prompt=positive_prompt,
negative_prompt=negative_prompt,
num_inference_steps=nag_step,
nag_scale=nag_guidance,
nag_alpha=nag_alpha,
nag_tau=nag_tau,
guidance_scale=0.0,
generator=torch.Generator(device="cuda").manual_seed(seed),
).images[0]
nag_path = f"images/{uuid.uuid4().hex}_nag.png"
output_nag.save(nag_path)
output_normal = nag_pipe(
prompt=positive_prompt,
negative_prompt=negative_prompt,
num_inference_steps=nag_step,
nag_scale=0.0,
guidance_scale=0.0,
generator=torch.Generator(device="cuda").manual_seed(seed),
).images[0]
normal_path = f"images/{uuid.uuid4().hex}_normal.png"
output_normal.save(normal_path)
print(f"NAG Image saved to {nag_path}")
save_image(positive_prompt, negative_prompt, output, output_nag, output_normal, {
"guidance_scale": guidance_scale,
"bias": bias,
"step": step,
"seed": seed,
"nag_guidance": nag_guidance,
"nag_alpha": nag_alpha,
"nag_tau": nag_tau,
"nag_step": nag_step,
})
return output_path, nag_path, normal_path
import json
with open("sample_prompts.json", "r") as f:
sample_prompts = json.load(f)
def load_sample():
sample = np.random.choice(sample_prompts)
return sample['prompt'], sample['missing_element']
with open("anti_aesthetics.json", "r") as f:
anti_aesthetics_prompts = json.load(f)
def load_anti_aesthetics_sample():
sample = np.random.choice(anti_aesthetics_prompts)
return sample['prompt'], sample['missing_element']
nouns = ["cat", "dog", "car", "bicycle", "tree", "house", "computer", "phone", "book", "chair", "table", "lamp", "flower", "mountain", "river", "ocean", "cloud", "bird", "fish", "butterfly", "sun", "moon", "planet", "robot", "alien", "castle", "dragon", "unicorn", "fairy", "wizard"]
methods = ["painting", "sketch", "drawing", "watercolor", "oil painting", "low poly art", "color pencil drawing", "crayon art", "ink wash", "paper cutout", "line art"]
def load_abstract_prompt():
noun = np.random.choice(nouns)
method = np.random.choice(methods)
prompt = f"An abstract {method} of a {noun}."
negative = f"{noun}"
return prompt, negative
# def rephrase_prompt(pos_prompt, neg_prompt):
# completion = client.chat.completions.create(
# extra_headers={
# "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
# "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
# },
# extra_body={},
# model="qwen/qwen3-vl-235b-a22b-instruct",
# messages=[
# {
# "role": "user",
# "content": "Repahrase the following prompt to one sentence for positive prompt and a few words for negative prompt.\n\nOriginal Prompt: {}\n\nNegative Element: {}. \n make sure the generated prompt follows the positive-negative prompt pair, do not mention the negative prompt in positive one".format(pos_prompt, neg_prompt)
# }
# ]
# )
import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4
import gradio as gr
import numpy as np
from PIL import Image
with gr.Blocks(title="Value Sign Flip SD3.5 Demo") as demo:
gr.Markdown("# Value Sign Flip SD3.5 Demo \n\n This demo is based on SD3.5-L-Turbo model and uses Value Sign Flip technique to generate images with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md) and [ArXiv](https://arxiv.org/pdf/2508.10931) \n\nPositive prompt should be at least 1 sentence long or the results will be weird. ")
gr.Markdown("To help with further research, all generations will be logged anonymously. If you do not wish to participate, please do not use the demo. Please keep prompts safe for work and non-offensive. ")
gr.Markdown("This project is supported by [Lambda Cloud](https://lambda.ai/), [Weathon Software](https://weasoft.com), and [Canada Foundation for Innovation](https://www.innovation.ca/). ")
gr.Markdown("Using negative prompts for abstract arts and anti-aesthetics arts are also one of our contributions and can be applied to either VSF or NAG techniques. Details in papaer appendix N or [this paper](https://www.researchgate.net/publication/397595102).")
# gr.Markdown("# Value Sign Flip Wan 2.1 Demo \n\n This demo is based on Wan 2.1 T2V model and uses Value Sign Flip technique to generate images with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 2 sentence long or the results will be weird.")
with gr.Row(min_height=200):
pos = gr.Textbox(label="Positive Prompt", value="A polished bicycle frame leans against a weathered brick wall under soft morning light.", lines=5)
neg = gr.Textbox(label="Negative Prompt", value="wheels", lines=5)
# rephase = gr.Button("Rephrase Prompt")
with gr.Row():
sample = gr.Button("Load A Sample Prompt (Uncurated)")
sample.click(fn=load_sample, inputs=[], outputs=[pos, neg])
anti_aesthetic_sample = gr.Button("Load An Anti-Aesthetic Sample Prompt (Could Cause Unease)")
anti_aesthetic_sample.click(fn=load_anti_aesthetics_sample, inputs=[], outputs=[pos, neg])
abstract_sample = gr.Button("Load An Abstract Prompt")
abstract_sample.click(fn=load_abstract_prompt, inputs=[], outputs=[pos, neg])
with gr.Row():
seed = gr.Number(label="Seed", value=0, precision=0)
randomize_seed = gr.Button("Randomize Seed")
with gr.Row():
gr.Markdown("## VSF Generation Parameters")
guidance = gr.Slider(0, 5, step=0.1, label="Guidance Scale", value=3.0)
bias = gr.Slider(0, 0.5, step=0.01, label="Bias", value=0.1)
step = gr.Slider(4, 15, step=1, label="Step", value=6)
randomize_seed.click(fn=lambda : np.random.randint(0, 1000000), inputs=[], outputs=[seed])
set_strong_vsf = gr.Button("Strong")
set_strong_vsf.click(fn=lambda : (3.8, 0.2), inputs=[], outputs=[guidance, bias])
set_normal_vsf = gr.Button("Normal")
set_normal_vsf.click(fn=lambda : (3.3, 0.2), inputs=[], outputs=[guidance, bias])
set_mild_vsf = gr.Button("Quality")
set_mild_vsf.click(fn=lambda : (2.5, 0.2), inputs=[], outputs=[guidance, bias])
with gr.Row():
gr.Markdown("## NAG Generation Parameters")
nag_guidance = gr.Slider(1, 12, step=0.1, label="Guidance Scale", value=5)
nag_alpha = gr.Slider(0.1, 1.0, step=0.01, label="Alpha", value=0.25)
nag_tau = gr.Slider(1, 10, step=0.01, label="Tau", value=3.0)
nag_step = gr.Slider(4, 15, step=1, label="Step", value=6)
set_strong = gr.Button("Strong")
set_strong.click(fn=lambda : (11, 0.5, 5.0), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])
set_normal = gr.Button("Normal")
set_normal.click(fn=lambda : (7, 0.25, 3.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])
set_mild = gr.Button("Quality")
set_mild.click(fn=lambda : (4, 0.125, 2.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])
with gr.Row():
vsf_out = gr.Image(label="VSF Generated Image")
nag_out = gr.Image(label="NAG Generated Image")
normal_out = gr.Image(label="Without Negative Guidance")
btn = gr.Button("Generate")
btn.click(fn=generate_image, inputs=[pos, neg, guidance, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step], outputs=[vsf_out, nag_out, normal_out])
demo.launch(share=True) |