File size: 9,256 Bytes
6d35d97
 
 
 
 
 
 
 
a67076d
 
 
 
 
 
 
 
 
 
 
6d35d97
92153b4
5598aeb
92153b4
 
 
 
 
8ab76da
1dc3e2a
1371493
 
 
 
 
 
 
5ea5de7
 
 
 
3ce4524
 
 
 
 
f531707
 
5ea5de7
 
 
 
 
 
 
 
32006ad
5ea5de7
3ce4524
5ea5de7
 
 
 
 
 
 
 
 
 
 
 
42ea091
 
a67076d
 
1371493
 
a67076d
1371493
92153b4
a67076d
 
92153b4
 
a67076d
be95d7f
 
 
a67076d
92153b4
 
 
 
a67076d
92153b4
a67076d
1371493
 
 
 
 
 
 
 
 
 
 
 
5ea5de7
 
 
 
 
 
 
 
 
 
1371493
8c252b5
 
 
 
 
 
 
e63cb33
bc3f915
df75977
 
bc3f915
df75977
 
 
bc3f915
df75977
 
 
 
 
 
 
 
bc3f915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c252b5
5ea5de7
 
 
 
 
 
 
 
 
 
 
dce2c34
5ea5de7
 
54421e6
5ea5de7
92153b4
a67076d
74090ff
 
 
df75977
bc3f915
32006ad
 
 
6639f79
32006ad
 
 
54421e6
 
a67076d
1371493
92153b4
a67076d
1371493
a67076d
bc3f915
 
 
 
a67076d
1371493
 
74090ff
1371493
 
 
97bcee3
 
 
 
1371493
 
 
 
 
 
a67076d
 
1371493
a67076d
54421e6
 
1371493
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
try:
    import spaces
except ImportError:
    class spaces:
        @staticmethod
        def GPU(fn):
            return fn
        
import gradio as gr
import numpy as np
import imageio

import torch
from diffusers import AutoencoderKLWan
from vsfwan.pipeline import WanPipeline
from vsfwan.processor import WanAttnProcessor2_0
from diffusers import WanVACEPipeline
from diffusers.utils import export_to_video
import uuid

import sys
import os
model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
from src.sd3_pipeline import VSFStableDiffusion3Pipeline
pipe = VSFStableDiffusion3Pipeline.from_pretrained(
    "stabilityai/stable-diffusion-3.5-large-turbo",
    torch_dtype=torch.bfloat16,
    hf_token=os.environ.get("HF_TOKEN", None)
) 

from nag import NAGStableDiffusion3Pipeline
nag_pipe = NAGStableDiffusion3Pipeline.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    token="hf_token",
)


from huggingface_hub import CommitScheduler, InferenceClient

from PIL import Image
import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4

IMAGE_DATASET_DIR = Path("image_dataset") / f"train-{uuid4()}"
IMAGE_DATASET_DIR.mkdir(parents=True, exist_ok=True)
IMAGE_JSONL_PATH = IMAGE_DATASET_DIR / "metadata.jsonl"

scheduler = CommitScheduler(
    repo_id="weathon/vsf-log",
    repo_type="dataset",
    folder_path=IMAGE_DATASET_DIR,
    path_in_repo=IMAGE_DATASET_DIR.name,
    every=1
)

def save_image(prompt: str, negative_prompt: str, img_vsf: Image, img_nag: Image, parameters: dict) -> None:
    vsf_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_vsf.png"
    nag_image_path = IMAGE_DATASET_DIR / f"{uuid4()}_nag.png"

    with scheduler.lock:
        img_vsf.save(vsf_image_path)
        img_nag.save(nag_image_path)
        with IMAGE_JSONL_PATH.open("a") as f:
            json.dump({"prompt": prompt, "negative_prompt": negative_prompt, "vsf_image_path": str(vsf_image_path), "nag_image_path": str(nag_image_path), "parameters": parameters, "timestamp": datetime.utcnow().isoformat()}, f)
            f.write("\n")


pipe = pipe.to("cuda")
nag_pipe = nag_pipe.to("cuda")
import os
@spaces.GPU
def generate_video(positive_prompt, negative_prompt, guidance_scale, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step, progress=gr.Progress(track_tqdm=False)):
    global pipe, nag_pipe
    lambda total: progress.tqdm(range(total))

    print(f"Generating image with params: {positive_prompt}, {negative_prompt}, {guidance_scale}, {bias}, {step}")

    output = pipe(
        prompt=positive_prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=step,
        guidance_scale=0.0,
        scale=guidance_scale,
        offset=bias,
        generator=torch.Generator(device="cuda").manual_seed(seed),
    ).images[0]
    os.makedirs("images", exist_ok=True)
    path = f"images/{uuid.uuid4().hex}.png"
    output.save(path)
    output_path = path
    print(f"Image saved to {output_path}")

    output_nag = nag_pipe(
        prompt=positive_prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=nag_step,
        nag_scale=nag_guidance,
        nag_alpha=nag_alpha,
        nag_tau=nag_tau,
        guidance_scale=0.0,
    ).images[0]
    nag_path = f"images/{uuid.uuid4().hex}_nag.png"
    output_nag.save(nag_path)
    print(f"NAG Image saved to {nag_path}")
    save_image(positive_prompt, negative_prompt, output, output_nag, {
        "guidance_scale": guidance_scale,
        "bias": bias,
        "step": step,
        "seed": seed,
        "nag_guidance": nag_guidance,
        "nag_alpha": nag_alpha,
        "nag_tau": nag_tau,
        "nag_step": nag_step,
    })
    return output_path, nag_path

import json
with open("sample_prompts.json", "r") as f:
    sample_prompts = json.load(f)

def load_sample():
    sample = np.random.choice(sample_prompts)
    return sample['prompt'], sample['missing_element']

with open("anti_aesthetics.json", "r") as f:
    anti_aesthetics_prompts = json.load(f)

def load_anti_aesthetics_sample():
    sample = np.random.choice(anti_aesthetics_prompts)
    return sample['prompt'], sample['missing_element']

nouns = ["cat", "dog", "car", "bicycle", "tree", "house", "computer", "phone", "book", "chair", "table", "lamp", "flower", "mountain", "river", "ocean", "cloud", "bird", "fish", "butterfly"]
methods = ["painting", "sketch", "drawing"]
def load_abstract_prompt():
    noun = np.random.choice(nouns)
    method = np.random.choice(methods)
    prompt = f"An abstract {method} of a {noun}."
    negative = f"{noun}"
    return prompt, negative
# def rephrase_prompt(pos_prompt, neg_prompt):
#     completion = client.chat.completions.create(
#     extra_headers={
#         "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
#         "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
#     },
#     extra_body={},
#     model="qwen/qwen3-vl-235b-a22b-instruct",
#     messages=[
#         {
#         "role": "user",
#         "content": "Repahrase the following prompt to one sentence for positive prompt and a few words for negative prompt.\n\nOriginal Prompt: {}\n\nNegative Element: {}. \n make sure the generated prompt follows the positive-negative prompt pair, do not mention the negative prompt in positive one".format(pos_prompt, neg_prompt)
#         }
#     ]
#     )


import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4

import gradio as gr
import numpy as np
from PIL import Image


with gr.Blocks(title="Value Sign Flip SD3.5 Demo") as demo:
    gr.Markdown("# Value Sign Flip SD3.5 Demo \n\n This demo is based on SD3.5-L-Turbo model and uses Value Sign Flip technique to generate videos with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 1 sentence long or the results will be weird. ")
    gr.Markdown("To help with further research, all generations will be logged anonymously. If you do not wish to participate, please do not use the demo. Please keep prompts safe for work and non-offensive. ") 
    gr.Markdown("This project is supported by [Lambda Cloud](https://lambda.ai/), [Weathon Software](https://weasoft.com), and [Canada Foundation for Innovation](https://www.innovation.ca/). ")
    
    # gr.Markdown("# Value Sign Flip Wan 2.1 Demo \n\n This demo is based on Wan 2.1 T2V model and uses Value Sign Flip technique to generate videos with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 2 sentence long or the results will be weird.")

    with gr.Row(min_height=200):
        pos = gr.Textbox(label="Positive Prompt", value="A polished bicycle frame leans against a weathered brick wall under soft morning light.", lines=5)
        neg = gr.Textbox(label="Negative Prompt", value="wheels", lines=5)
        
        # rephase = gr.Button("Rephrase Prompt") 
    with gr.Row(): 
        sample = gr.Button("Load A Sample Prompt")
        sample.click(fn=load_sample, inputs=[], outputs=[pos, neg])
        anti_aesthetic_sample = gr.Button("Load An Anti-Aesthetic Sample Prompt (Could Cause Unease)")
        anti_aesthetic_sample.click(fn=load_anti_aesthetics_sample, inputs=[], outputs=[pos, neg])
        abstract_sample = gr.Button("Load An Abstract Prompt")
        abstract_sample.click(fn=load_abstract_prompt, inputs=[], outputs=[pos, neg])


    with gr.Row():
        gr.Markdown("## VSF Generation Parameters")
        guidance = gr.Slider(0, 5, step=0.1, label="Guidance Scale", value=3.0)
        bias = gr.Slider(0, 0.5, step=0.01, label="Bias", value=0.1)
        step = gr.Slider(4, 15, step=1, label="Step", value=8)
        seed = gr.Number(label="Seed", value=0, precision=0)
        set_strong_vsf = gr.Button("Set to VSF Strong Settings")
        set_strong_vsf.click(fn=lambda : (3.8, 0.2), inputs=[], outputs=[guidance, bias])
        set_mild_vsf = gr.Button("Set to VSF Quality Settings")
        set_mild_vsf.click(fn=lambda : (3.3, 0.2), inputs=[], outputs=[guidance, bias])

    with gr.Row():
        gr.Markdown("## NAG Generation Parameters")
        nag_guidance = gr.Slider(1, 12, step=0.1, label="Guidance Scale", value=5)
        nag_alpha = gr.Slider(0.1, 1.0, step=0.01, label="Alpha", value=0.25)
        nag_tau = gr.Slider(1, 10, step=0.01, label="Tau", value=3.0)
        nag_step = gr.Slider(4, 15, step=1, label="Step", value=8)
        set_strong = gr.Button("Set to NAG Strong Settings")
        set_strong.click(fn=lambda : (11, 0.5, 5.0), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])
        set_mild = gr.Button("Set to NAG Quality Settings")
        set_mild.click(fn=lambda : (4, 0.125, 2.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])

        

    with gr.Row():
        vsf_out = gr.Image(label="VSF Generated Image")
        nag_out = gr.Image(label="NAG Generated Image")

    btn = gr.Button("Generate")
    btn.click(fn=generate_video, inputs=[pos, neg, guidance, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step], outputs=[vsf_out, nag_out])



demo.launch(share=True)