|
|
NAME = 'XL Vec' |
|
|
|
|
|
from torch import Tensor, FloatTensor, nn |
|
|
import gradio as gr |
|
|
from modules.processing import StableDiffusionProcessing |
|
|
from modules import scripts |
|
|
|
|
|
from scripts.sdhook import SDHook |
|
|
from scripts.xl_clip import CLIP_SDXL, get_pooled |
|
|
from scripts.xl_vec_xyz import init_xyz |
|
|
|
|
|
|
|
|
|
|
|
def hook_input( |
|
|
args: 'Hook', |
|
|
mod: nn.Module, |
|
|
inputs: tuple[dict[str,Tensor]] |
|
|
): |
|
|
if not args.enabled: |
|
|
return |
|
|
|
|
|
assert isinstance(mod, CLIP_SDXL) |
|
|
input = inputs[0] |
|
|
overwritten_keys = set() |
|
|
|
|
|
def create(v: list[float], src: FloatTensor): |
|
|
return FloatTensor(v).to(dtype=src.dtype, device=src.device) |
|
|
|
|
|
def put(name: str, v: list[float]): |
|
|
if name in input: |
|
|
src = input[name] |
|
|
input[name] = create(v, src).reshape(src.shape) |
|
|
overwritten_keys.add(name) |
|
|
|
|
|
old = {k: v for k, v in input.items()} |
|
|
|
|
|
put('original_size_as_tuple', [args.original_height, args.original_width]) |
|
|
put('crop_coords_top_left', [args.crop_top, args.crop_left]) |
|
|
put('target_size_as_tuple', [args.target_height, args.target_width]) |
|
|
if input['aesthetic_score'].item() == 6.0: |
|
|
|
|
|
put('aesthetic_score', [args.aesthetic_score]) |
|
|
else: |
|
|
|
|
|
put('aesthetic_score', [args.negative_aesthetic_score]) |
|
|
|
|
|
new = {k: v for k, v in input.items()} |
|
|
|
|
|
for k in overwritten_keys: |
|
|
print(f"{k}: {old[k].tolist()} -> {new[k].tolist()}") |
|
|
|
|
|
return inputs |
|
|
|
|
|
def hook_output( |
|
|
args: 'Hook', |
|
|
mod: nn.Module, |
|
|
inputs: tuple[dict[str,Tensor]], |
|
|
output: dict, |
|
|
): |
|
|
if not args.enabled: |
|
|
return |
|
|
|
|
|
if inputs[0]['aesthetic_score'].item() == 6.0: |
|
|
|
|
|
prompt = args.extra_prompt |
|
|
index = args.token_index |
|
|
multiplier = args.eot_multiplier |
|
|
else: |
|
|
|
|
|
prompt = args.extra_negative_prompt |
|
|
index = args.negative_token_index |
|
|
multiplier = args.negative_eot_multiplier |
|
|
|
|
|
if prompt is None or len(prompt) == 0: |
|
|
if index == -1 and multiplier == 1.0: |
|
|
|
|
|
return |
|
|
|
|
|
prompt = inputs[0]['txt'][0] |
|
|
|
|
|
assert isinstance(mod, CLIP_SDXL) |
|
|
|
|
|
try: |
|
|
args.enabled = False |
|
|
pooled, at = get_pooled(mod, prompt, index=index) |
|
|
assert pooled.shape == (1, 1280), f'pooled.shape={pooled.shape}' |
|
|
finally: |
|
|
args.enabled = True |
|
|
|
|
|
output['vector'][:, 0:1280] = pooled[:] * multiplier |
|
|
print(f"vector[:, 0:1280]: {inputs[0]['txt']} -> {[prompt]} @ {at} [M={multiplier:.3f}]") |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
class Hook(SDHook): |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
enabled: bool, |
|
|
p: StableDiffusionProcessing, |
|
|
crop_left: float, |
|
|
crop_top: float, |
|
|
original_width: float, |
|
|
original_height: float, |
|
|
target_width: float, |
|
|
target_height: float, |
|
|
aesthetic_score: float, |
|
|
negative_aesthetic_score: float, |
|
|
extra_prompt: str|None, |
|
|
extra_negative_prompt: str|None, |
|
|
token_index: int|float, |
|
|
negative_token_index: int|float, |
|
|
eot_multiplier: float, |
|
|
negative_eot_multiplier: float, |
|
|
with_hr: bool, |
|
|
): |
|
|
super().__init__(enabled) |
|
|
self.p = p |
|
|
self.crop_left = float(crop_left) |
|
|
self.crop_top = float(crop_top) |
|
|
self.original_width = float(original_width) |
|
|
self.original_height = float(original_height) |
|
|
self.target_width = float(target_width) |
|
|
self.target_height = float(target_height) |
|
|
self.aesthetic_score = float(aesthetic_score) |
|
|
self.negative_aesthetic_score = float(negative_aesthetic_score) |
|
|
self.extra_prompt = extra_prompt |
|
|
self.extra_negative_prompt = extra_negative_prompt |
|
|
self.token_index = int(token_index) |
|
|
self.negative_token_index = int(negative_token_index) |
|
|
self.eot_multiplier = float(eot_multiplier) |
|
|
self.negative_eot_multiplier = float(negative_eot_multiplier) |
|
|
self.with_hr = bool(with_hr) |
|
|
|
|
|
def hook_clip(self, p: StableDiffusionProcessing, clip: nn.Module): |
|
|
if not hasattr(p.sd_model, 'is_sdxl') or not p.sd_model.is_sdxl: |
|
|
return |
|
|
|
|
|
def inp(*args, **kwargs): |
|
|
return hook_input(self, *args, **kwargs) |
|
|
|
|
|
def outp(*args, **kwargs): |
|
|
return hook_output(self, *args, **kwargs) |
|
|
|
|
|
self.hook_layer_pre(clip, inp) |
|
|
self.hook_layer(clip, outp) |
|
|
|
|
|
|
|
|
|
|
|
class Script(scripts.Script): |
|
|
|
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
self.last_hooker: SDHook|None = None |
|
|
|
|
|
def title(self): |
|
|
return NAME |
|
|
|
|
|
def show(self, is_img2img): |
|
|
return scripts.AlwaysVisible |
|
|
|
|
|
def ui(self, is_img2img): |
|
|
with gr.Accordion(NAME, open=False): |
|
|
with gr.Row(): |
|
|
enabled = gr.Checkbox(label='Enabled', value=False) |
|
|
with_hr = gr.Checkbox(label='Also enable on Hires fix', value=False, visible=False) |
|
|
crop_left = gr.Slider(minimum=-512, maximum=512, step=1, value=0, label='Crop Left') |
|
|
crop_top = gr.Slider(minimum=-512, maximum=512, step=1, value=0, label='Crop Top') |
|
|
original_width = gr.Slider(minimum=-1, maximum=4096, step=1, value=-1, label='Original Width (-1 is original size)') |
|
|
original_height = gr.Slider(minimum=-1, maximum=4096, step=1, value=-1, label='Original Height (-1 is original size)') |
|
|
target_width = gr.Slider(minimum=-1, maximum=4096, step=1, value=-1, label='Target Width (-1 is original size)') |
|
|
target_height = gr.Slider(minimum=-1, maximum=4096, step=1, value=-1, label='Target Height (-1 is original size)') |
|
|
aesthetic_score = gr.Slider(minimum=0.0, maximum=10.0, step=0.05, value=6.0, label="Aesthetic Score (0..10)") |
|
|
negative_aesthetic_score = gr.Slider(minimum=0.0, maximum=10.0, step=0.05, value=2.5, label="Negative Aesthetic Score (0..10)") |
|
|
extra_prompt = gr.Textbox(lines=3, label='Extra prompt (set empty to be disabled)') |
|
|
extra_negative_prompt = gr.Textbox(lines=3, label='Extra negative prompt (set empty to be disabled)') |
|
|
token_index = gr.Slider(minimum=-77, maximum=76, step=1, value=-1, label='Token index in the prompt for the vector (-1 is first EOT)') |
|
|
negative_token_index = gr.Slider(minimum=-77, maximum=76, step=1, value=-1, label='Token index in the negative prompt for the vector (-1 is first EOT)') |
|
|
eot_multiplier = gr.Slider(minimum=-4.0, maximum=8.0, step=0.05, value=1.0, label='Token multiplier') |
|
|
negative_eot_multiplier = gr.Slider(minimum=-4.0, maximum=8.0, step=0.05, value=1.0, label='Negative token multiplier') |
|
|
return [ |
|
|
enabled, |
|
|
crop_left, |
|
|
crop_top, |
|
|
original_width, |
|
|
original_height, |
|
|
target_width, |
|
|
target_height, |
|
|
aesthetic_score, |
|
|
negative_aesthetic_score, |
|
|
extra_prompt, |
|
|
extra_negative_prompt, |
|
|
token_index, |
|
|
negative_token_index, |
|
|
eot_multiplier, |
|
|
negative_eot_multiplier, |
|
|
with_hr, |
|
|
] |
|
|
|
|
|
def process( |
|
|
self, |
|
|
p: StableDiffusionProcessing, |
|
|
enabled: bool, |
|
|
crop_left: float, |
|
|
crop_top: float, |
|
|
original_width: float, |
|
|
original_height: float, |
|
|
target_width: float, |
|
|
target_height: float, |
|
|
aesthetic_score: float, |
|
|
negative_aesthetic_score: float, |
|
|
extra_prompt: str, |
|
|
extra_negative_prompt: str, |
|
|
token_index: float, |
|
|
negative_token_index: float, |
|
|
eot_multiplier: float, |
|
|
negative_eot_multiplier: float, |
|
|
with_hr: bool, |
|
|
): |
|
|
|
|
|
if self.last_hooker is not None: |
|
|
self.last_hooker.__exit__(None, None, None) |
|
|
self.last_hooker = None |
|
|
|
|
|
if not enabled: |
|
|
return |
|
|
|
|
|
if original_width < 0: |
|
|
original_width = p.width |
|
|
if original_height < 0: |
|
|
original_height = p.height |
|
|
if target_width < 0: |
|
|
target_width = p.width |
|
|
if target_height < 0: |
|
|
target_height = p.height |
|
|
|
|
|
self.last_hooker = Hook( |
|
|
enabled=True, |
|
|
p=p, |
|
|
crop_left=crop_left, |
|
|
crop_top=crop_top, |
|
|
original_width=original_width, |
|
|
original_height=original_height, |
|
|
target_width=target_width, |
|
|
target_height=target_height, |
|
|
aesthetic_score=aesthetic_score, |
|
|
negative_aesthetic_score=negative_aesthetic_score, |
|
|
extra_prompt=extra_prompt, |
|
|
extra_negative_prompt=extra_negative_prompt, |
|
|
token_index=token_index, |
|
|
negative_token_index=negative_token_index, |
|
|
eot_multiplier=eot_multiplier, |
|
|
negative_eot_multiplier=negative_eot_multiplier, |
|
|
with_hr=with_hr, |
|
|
) |
|
|
|
|
|
self.last_hooker.setup(p) |
|
|
self.last_hooker.__enter__() |
|
|
|
|
|
p.extra_generation_params.update({ |
|
|
f'[{NAME}] Enabled': enabled, |
|
|
|
|
|
f'[{NAME}] Crop Left': crop_left, |
|
|
f'[{NAME}] Crop Top': crop_top, |
|
|
f'[{NAME}] Original Width': original_width, |
|
|
f'[{NAME}] Original Height': original_height, |
|
|
f'[{NAME}] Target Width': target_width, |
|
|
f'[{NAME}] Target Height': target_height, |
|
|
f'[{NAME}] Aesthetic Score': aesthetic_score, |
|
|
f'[{NAME}] Negative Aesthetic Score': negative_aesthetic_score, |
|
|
f'[{NAME}] Extra Prompt': extra_prompt.__repr__(), |
|
|
f'[{NAME}] Extra Negative Prompt': extra_negative_prompt.__repr__(), |
|
|
f'[{NAME}] Token Index': token_index, |
|
|
f'[{NAME}] Negative Token Index': negative_token_index, |
|
|
f'[{NAME}] EOT Multiplier': eot_multiplier, |
|
|
f'[{NAME}] Negative EOT Multiplier': negative_eot_multiplier, |
|
|
}) |
|
|
|
|
|
if hasattr(p, 'cached_c'): |
|
|
p.cached_c = [None, None] |
|
|
if hasattr(p, 'cached_uc'): |
|
|
p.cached_uc = [None, None] |
|
|
|
|
|
|
|
|
|
|
|
init_xyz(Script, NAME) |
|
|
|