|
|
try: |
|
|
import spaces |
|
|
except ImportError: |
|
|
class spaces: |
|
|
@staticmethod |
|
|
def GPU(fn): |
|
|
return fn |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import imageio |
|
|
|
|
|
import torch |
|
|
from diffusers import AutoencoderKLWan |
|
|
from vsfwan.pipeline import WanPipeline |
|
|
from vsfwan.processor import WanAttnProcessor2_0 |
|
|
from diffusers import WanVACEPipeline |
|
|
import uuid |
|
|
|
|
|
import sys |
|
|
import os |
|
|
model_id = "stabilityai/stable-diffusion-3.5-large-turbo" |
|
|
from src.sd3_pipeline import VSFStableDiffusion3Pipeline |
|
|
pipe = VSFStableDiffusion3Pipeline.from_pretrained( |
|
|
"stabilityai/stable-diffusion-3.5-large-turbo", |
|
|
torch_dtype=torch.bfloat16, |
|
|
hf_token=os.environ.get("HF_TOKEN", None) |
|
|
) |
|
|
|
|
|
from nag import NAGStableDiffusion3Pipeline |
|
|
nag_pipe = NAGStableDiffusion3Pipeline.from_pretrained( |
|
|
model_id, |
|
|
torch_dtype=torch.bfloat16, |
|
|
) |
|
|
|
|
|
|
|
|
from huggingface_hub import CommitScheduler, InferenceClient |
|
|
|
|
|
from PIL import Image |
|
|
import json |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from uuid import uuid4 |
|
|
|
|
|
IMAGE_DATASET_DIR = Path("image_dataset") / f"train/{uuid4()}" |
|
|
IMAGE_DATASET_DIR.mkdir(parents=True, exist_ok=True) |
|
|
IMAGE_JSONL_PATH = IMAGE_DATASET_DIR / "metadata.jsonl" |
|
|
|
|
|
scheduler = CommitScheduler( |
|
|
repo_id="weathon/vsf-log", |
|
|
repo_type="dataset", |
|
|
folder_path=IMAGE_DATASET_DIR, |
|
|
path_in_repo=IMAGE_DATASET_DIR.name, |
|
|
every=1 |
|
|
) |
|
|
|
|
|
def save_image(prompt: str, negative_prompt: str, img_vsf: Image, img_nag: Image, img_normal: Image, parameters: dict) -> None: |
|
|
vsf_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_vsf.png" |
|
|
nag_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_nag.png" |
|
|
normal_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_normal.png" |
|
|
|
|
|
with scheduler.lock: |
|
|
img_vsf.save(vsf_image_path) |
|
|
img_nag.save(nag_image_path) |
|
|
img_normal.save(normal_image_path) |
|
|
with IMAGE_JSONL_PATH.open("a") as f: |
|
|
json.dump({"prompt": prompt, "negative_prompt": negative_prompt, "vsf_image_path": str(vsf_image_path), "nag_image_path": str(nag_image_path), "normal_image_path": str(normal_image_path), "parameters": parameters, "timestamp": datetime.utcnow().isoformat()}, f) |
|
|
f.write("\n") |
|
|
|
|
|
|
|
|
pipe = pipe.to("cuda") |
|
|
nag_pipe = nag_pipe.to("cuda") |
|
|
import os |
|
|
@spaces.GPU |
|
|
def generate_image(positive_prompt, negative_prompt, guidance_scale, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step, progress=gr.Progress(track_tqdm=False)): |
|
|
global pipe, nag_pipe |
|
|
lambda total: progress.tqdm(range(total)) |
|
|
|
|
|
print(f"Generating image with params: {positive_prompt}, {negative_prompt}, {guidance_scale}, {bias}, {step}") |
|
|
|
|
|
output = pipe( |
|
|
prompt=positive_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
num_inference_steps=step, |
|
|
guidance_scale=0.0, |
|
|
scale=guidance_scale, |
|
|
offset=bias, |
|
|
generator=torch.Generator(device="cuda").manual_seed(seed), |
|
|
).images[0] |
|
|
os.makedirs("images", exist_ok=True) |
|
|
path = f"images/{uuid.uuid4().hex}.png" |
|
|
output.save(path) |
|
|
output_path = path |
|
|
print(f"Image saved to {output_path}") |
|
|
|
|
|
output_nag = nag_pipe( |
|
|
prompt=positive_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
num_inference_steps=nag_step, |
|
|
nag_scale=nag_guidance, |
|
|
nag_alpha=nag_alpha, |
|
|
nag_tau=nag_tau, |
|
|
guidance_scale=0.0, |
|
|
generator=torch.Generator(device="cuda").manual_seed(seed), |
|
|
).images[0] |
|
|
nag_path = f"images/{uuid.uuid4().hex}_nag.png" |
|
|
output_nag.save(nag_path) |
|
|
|
|
|
output_normal = nag_pipe( |
|
|
prompt=positive_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
num_inference_steps=nag_step, |
|
|
nag_scale=0.0, |
|
|
guidance_scale=0.0, |
|
|
generator=torch.Generator(device="cuda").manual_seed(seed), |
|
|
).images[0] |
|
|
normal_path = f"images/{uuid.uuid4().hex}_normal.png" |
|
|
output_normal.save(normal_path) |
|
|
|
|
|
print(f"NAG Image saved to {nag_path}") |
|
|
save_image(positive_prompt, negative_prompt, output, output_nag, output_normal, { |
|
|
"guidance_scale": guidance_scale, |
|
|
"bias": bias, |
|
|
"step": step, |
|
|
"seed": seed, |
|
|
"nag_guidance": nag_guidance, |
|
|
"nag_alpha": nag_alpha, |
|
|
"nag_tau": nag_tau, |
|
|
"nag_step": nag_step, |
|
|
}) |
|
|
return output_path, nag_path, normal_path |
|
|
|
|
|
import json |
|
|
with open("sample_prompts.json", "r") as f: |
|
|
sample_prompts = json.load(f) |
|
|
|
|
|
def load_sample(): |
|
|
sample = np.random.choice(sample_prompts) |
|
|
return sample['prompt'], sample['missing_element'] |
|
|
|
|
|
with open("anti_aesthetics.json", "r") as f: |
|
|
anti_aesthetics_prompts = json.load(f) |
|
|
|
|
|
def load_anti_aesthetics_sample(): |
|
|
sample = np.random.choice(anti_aesthetics_prompts) |
|
|
return sample['prompt'], sample['missing_element'] |
|
|
|
|
|
nouns = ["cat", "dog", "car", "bicycle", "tree", "house", "computer", "phone", "book", "chair", "table", "lamp", "flower", "mountain", "river", "ocean", "cloud", "bird", "fish", "butterfly", "sun", "moon", "planet", "robot", "alien", "castle", "dragon", "unicorn", "fairy", "wizard"] |
|
|
methods = ["painting", "sketch", "drawing", "watercolor", "oil painting", "low poly art", "color pencil drawing", "crayon art", "ink wash", "paper cutout", "line art"] |
|
|
def load_abstract_prompt(): |
|
|
noun = np.random.choice(nouns) |
|
|
method = np.random.choice(methods) |
|
|
prompt = f"An abstract {method} of a {noun}." |
|
|
negative = f"{noun}" |
|
|
return prompt, negative |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from uuid import uuid4 |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Value Sign Flip SD3.5 Demo") as demo: |
|
|
gr.Markdown("# Value Sign Flip SD3.5 Demo \n\n This demo is based on SD3.5-L-Turbo model and uses Value Sign Flip technique to generate images with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md) and [ArXiv](https://arxiv.org/pdf/2508.10931) \n\nPositive prompt should be at least 1 sentence long or the results will be weird. ") |
|
|
gr.Markdown("To help with further research, all generations will be logged anonymously. If you do not wish to participate, please do not use the demo. Please keep prompts safe for work and non-offensive. ") |
|
|
gr.Markdown("This project is supported by [Lambda Cloud](https://lambda.ai/), [Weathon Software](https://weasoft.com), and [Canada Foundation for Innovation](https://www.innovation.ca/). ") |
|
|
gr.Markdown("Using negative prompts for abstract arts and anti-aesthetics arts are also one of our contributions and can be applied to either VSF or NAG techniques. Details in papaer appendix N or [this paper](https://www.researchgate.net/publication/397595102).") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(min_height=200): |
|
|
pos = gr.Textbox(label="Positive Prompt", value="A polished bicycle frame leans against a weathered brick wall under soft morning light.", lines=5) |
|
|
neg = gr.Textbox(label="Negative Prompt", value="wheels", lines=5) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
sample = gr.Button("Load A Sample Prompt (Uncurated)") |
|
|
sample.click(fn=load_sample, inputs=[], outputs=[pos, neg]) |
|
|
anti_aesthetic_sample = gr.Button("Load An Anti-Aesthetic Sample Prompt (Could Cause Unease)") |
|
|
anti_aesthetic_sample.click(fn=load_anti_aesthetics_sample, inputs=[], outputs=[pos, neg]) |
|
|
abstract_sample = gr.Button("Load An Abstract Prompt") |
|
|
abstract_sample.click(fn=load_abstract_prompt, inputs=[], outputs=[pos, neg]) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
seed = gr.Number(label="Seed", value=0, precision=0) |
|
|
randomize_seed = gr.Button("Randomize Seed") |
|
|
|
|
|
with gr.Row(): |
|
|
gr.Markdown("## VSF Generation Parameters") |
|
|
guidance = gr.Slider(0, 5, step=0.1, label="Guidance Scale", value=3.0) |
|
|
bias = gr.Slider(0, 0.5, step=0.01, label="Bias", value=0.1) |
|
|
step = gr.Slider(4, 15, step=1, label="Step", value=6) |
|
|
randomize_seed.click(fn=lambda : np.random.randint(0, 1000000), inputs=[], outputs=[seed]) |
|
|
set_strong_vsf = gr.Button("Strong") |
|
|
set_strong_vsf.click(fn=lambda : (3.8, 0.2), inputs=[], outputs=[guidance, bias]) |
|
|
set_normal_vsf = gr.Button("Normal") |
|
|
set_normal_vsf.click(fn=lambda : (3.3, 0.2), inputs=[], outputs=[guidance, bias]) |
|
|
set_mild_vsf = gr.Button("Quality") |
|
|
set_mild_vsf.click(fn=lambda : (2.5, 0.2), inputs=[], outputs=[guidance, bias]) |
|
|
|
|
|
with gr.Row(): |
|
|
gr.Markdown("## NAG Generation Parameters") |
|
|
nag_guidance = gr.Slider(1, 12, step=0.1, label="Guidance Scale", value=5) |
|
|
nag_alpha = gr.Slider(0.1, 1.0, step=0.01, label="Alpha", value=0.25) |
|
|
nag_tau = gr.Slider(1, 10, step=0.01, label="Tau", value=3.0) |
|
|
nag_step = gr.Slider(4, 15, step=1, label="Step", value=6) |
|
|
set_strong = gr.Button("Strong") |
|
|
set_strong.click(fn=lambda : (11, 0.5, 5.0), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau]) |
|
|
set_normal = gr.Button("Normal") |
|
|
set_normal.click(fn=lambda : (7, 0.25, 3.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau]) |
|
|
set_mild = gr.Button("Quality") |
|
|
set_mild.click(fn=lambda : (4, 0.125, 2.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau]) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
vsf_out = gr.Image(label="VSF Generated Image") |
|
|
nag_out = gr.Image(label="NAG Generated Image") |
|
|
normal_out = gr.Image(label="Without Negative Guidance") |
|
|
|
|
|
btn = gr.Button("Generate") |
|
|
btn.click(fn=generate_image, inputs=[pos, neg, guidance, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step], outputs=[vsf_out, nag_out, normal_out]) |
|
|
|
|
|
|
|
|
demo.launch(share=True) |