Spaces:

dezzman
/

diffusion_models

Sleeping

App Files Files Community

dezzman commited on Feb 8, 2025

Commit

f3f96df

verified ·

1 Parent(s): aa913a2

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -19

app.py CHANGED Viewed

@@ -1,15 +1,46 @@
 import gradio as gr
 import numpy as np
 import torch
-from diffusers import StableDiffusionPipeline
 from peft import PeftModel, LoraConfig
 import os
 def get_lora_sd_pipeline(
     ckpt_dir='./lora_logos',
     base_model_name_or_path=None,
     dtype=torch.float16,
-    adapter_name="default"
     ):
     unet_sub_dir = os.path.join(ckpt_dir, "unet")
@@ -22,7 +53,12 @@ def get_lora_sd_pipeline(
     if base_model_name_or_path is None:
         raise ValueError("Please specify the base model name or path")
-    pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
     before_params = pipe.unet.parameters()
     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
     pipe.unet.set_adapter(adapter_name)
@@ -35,7 +71,7 @@ def get_lora_sd_pipeline(
     if dtype in (torch.float16, torch.bfloat16):
         pipe.unet.half()
         pipe.text_encoder.half()
     return pipe
 def process_prompt(prompt, tokenizer, text_encoder, max_length=77):
@@ -52,14 +88,36 @@ def align_embeddings(prompt_embeds, negative_prompt_embeds):
     return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
            torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_id_default = "CompVis/stable-diffusion-v1-4"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-pipe_default = get_lora_sd_pipeline(ckpt_dir='./lora_logos', base_model_name_or_path=model_id_default, dtype=torch_dtype).to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
 def infer(
     prompt,
@@ -71,24 +129,59 @@ def infer(
     seed=42,
     guidance_scale=7.0,
     lora_scale=0.5,
     progress=gr.Progress(track_tqdm=True)
     ):
     generator = torch.Generator(device).manual_seed(seed)
-    if model_id != model_id_default:
-        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
-        prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
-        negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
-        prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
     else:
-        pipe = pipe_default
-        prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
-        negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
-        prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
         print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
         print(f"LoRA scale applied: {lora_scale}")
         pipe.fuse_lora(lora_scale=lora_scale)
     params = {
         'prompt_embeds': prompt_embeds,
@@ -99,6 +192,23 @@ def infer(
         'height': height,
         'generator': generator,
     }
     return pipe(**params).images[0]
@@ -169,6 +279,36 @@ with gr.Blocks(css=css) as demo:
                 value=20,
             )
         with gr.Accordion("Optional Settings", open=False):
             with gr.Row():
                 width = gr.Slider(
@@ -204,6 +344,13 @@ with gr.Blocks(css=css) as demo:
             seed,
             guidance_scale,
             lora_scale,
         ],
         outputs=[result],
     )

 import gradio as gr
 import numpy as np
 import torch
+from diffusers import (
+    StableDiffusionPipeline,
+    StableDiffusionControlNetPipeline,
+    ControlNetModel
+)
 from peft import PeftModel, LoraConfig
 import os
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+IP_ADAPTER = 'h94/IP-Adapter'
+IP_ADAPTER_WEIGHT_NAME = "ip-adapter-plus_sd15.bin"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_id_default = "CompVis/stable-diffusion-v1-4"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+hed = None
+dict_controlnet = {
+    "edge_detection": "lllyasviel/sd-controlnet-canny",
+    # "pose_estimation": "lllyasviel/sd-controlnet-openpose",
+    # "depth_map": "lllyasviel/sd-controlnet-depth",
+    "scribble": "lllyasviel/sd-controlnet-scribble",
+    # "MLSD": "lllyasviel/sd-controlnet-mlsd"
+}
+controlnet = ControlNetModel.from_pretrained(
+    dict_controlnet["edge_detection"],
+    cache_dir="./models_cache",
+    torch_dtype=torch_dtype,
+)
 def get_lora_sd_pipeline(
     ckpt_dir='./lora_logos',
     base_model_name_or_path=None,
     dtype=torch.float16,
+    adapter_name="default",
+    controlnet
     ):
     unet_sub_dir = os.path.join(ckpt_dir, "unet")
     if base_model_name_or_path is None:
         raise ValueError("Please specify the base model name or path")
+    pipe = StableDiffusionControlNetPipeline.from_pretrained(
+        base_model_name_or_path,
+        torch_dtype=dtype,
+        controlnet=controlnet,
+    )
     before_params = pipe.unet.parameters()
     pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
     pipe.unet.set_adapter(adapter_name)
     if dtype in (torch.float16, torch.bfloat16):
         pipe.unet.half()
         pipe.text_encoder.half()
     return pipe
 def process_prompt(prompt, tokenizer, text_encoder, max_length=77):
     return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
            torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
+def map_edge_detection(image_path: str) -> Image:
+    source_img = load_image(image_path).convert('RGB')
+    edges = cv.Canny(np.array(source_img), 80, 160)
+    edges = np.repeat(edges[:, :, None], 3, axis=2)
+    final_image = Image.fromarray(edges)
+    return final_image
+def map_scribble(image_path: str) -> Image:
+    global hed
+    if not hed:
+        hed = HEDdetector.from_pretrained('lllyasviel/Annotators')
+    image = load_image(image_path).convert('RGB')
+    scribble_image = hed(image)
+    image_np = np.array(scribble_image)
+    image_np = cv.medianBlur(image_np, 3)
+    image = cv.convertScaleAbs(image_np, alpha=1.5, beta=0)
+    final_image = Image.fromarray(image)
+    return final_image
+pipe = get_lora_sd_pipeline(
+    ckpt_dir='./lora_logos',
+    base_model_name_or_path=model_id_default,
+    dtype=torch_dtype,
+    controlnet=controlnet
+).to(device)
 def infer(
     prompt,
     seed=42,
     guidance_scale=7.0,
     lora_scale=0.5,
+    cn_enable=False,
+    cn_strength=0.0,
+    cn_mode='edge_detection',
+    cn_image=None,
+    ip_enable=False,
+    ip_scale=0.5,
+    ip_image=None,
     progress=gr.Progress(track_tqdm=True)
     ):
     generator = torch.Generator(device).manual_seed(seed)
+    global pipe
+    global controlnet
+    controlnet_changed = False
+    if cn_enable:
+        if dict_controlnet[cn_mode] != pipe.controlnet._name_or_path:
+            controlnet = ControlNetModel.from_pretrained(
+                dict_controlnet[cn_mode],
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
+            controlnet_changed = True
     else:
+        cn_strength = 0.0  # отключаем контролнет принудительно
+    if model_id != pipe._name_or_path:
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch_dtype,
+            controlnet=controlnet,
+            controlnet_conditioning_scale=cn_strength,
+        ).to(device)
+    elif (model_id == pipe._name_or_path) and controlnet_changed:
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch_dtype,
+            controlnet=controlnet,
+            controlnet_conditioning_scale=cn_strength,
+        ).to(device)
+        print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
+        print(f"LoRA scale applied: {lora_scale}")
+        pipe.fuse_lora(lora_scale=lora_scale)
+    elif (model_id == pipe._name_or_path) and not controlnet_changed:
         print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
         print(f"LoRA scale applied: {lora_scale}")
         pipe.fuse_lora(lora_scale=lora_scale)
+    prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
+    negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
+    prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
     params = {
         'prompt_embeds': prompt_embeds,
         'height': height,
         'generator': generator,
     }
+    if cn_enable:
+        params['controlnet_conditioning_scale'] = cn_strength
+        if cn_mode == 'edge_detection':
+            control_image = map_edge_detection(cn_image)
+        elif cn_mode == 'scribble':
+            control_image = map_scribble(cn_image)
+        params['control_image'] = control_image
+    if ip_enable:
+        pipe.load_ip_adapter(
+            IP_ADAPTER,
+            subfolder="models",
+            weight_name=IP_ADAPTER_WEIGHT_NAME,
+        )
+        params['ip_adapter_image'] = load_image(ip_image).convert('RGB')
+        pipe.ip_scale(0.6)
     return pipe(**params).images[0]
                 value=20,
             )
+        # Секция Control Net
+        cn_enable = gr.Checkbox(label="Enable ControlNet")
+        with gr.Column(visible=False) as cn_options:
+            with gr.Row():
+                cn_strength = gr.Slider(0, 2, value=0.8, step=0.1, label="Control strength", interactive=True)
+                cn_mode = gr.Dropdown(
+                    choices=["edge_detection", "scribble"],
+                    label="Work regime",
+                    interactive=True,
+                )
+            cn_image = gr.Image(type="filepath", label="Control image")
+        cn_enable.change(
+            lambda x: gr.update(visible=x),
+            inputs=cn_enable,
+            outputs=cn_options
+        )
+        # Секция IP-Adapter
+        ip_enable = gr.Checkbox(label="Enable IP-Adapter")
+        with gr.Column(visible=False) as ip_options:
+            ip_scale = gr.Slider(0, 1, value=0.5, step=0.1, label="IP-adapter scale", interactive=True)
+            ip_image = gr.Image(type="filepath", label="IP-adapter image", interactive=True)
+        ip_enable.change(
+            lambda x: gr.update(visible=x),
+            inputs=ip_enable,
+            outputs=ip_options
+        )
         with gr.Accordion("Optional Settings", open=False):
             with gr.Row():
                 width = gr.Slider(
             seed,
             guidance_scale,
             lora_scale,
+            cn_enable,
+            cn_strength,
+            cn_mode,
+            cn_image,
+            ip_enable,
+            ip_scale,
+            ip_image
         ],
         outputs=[result],
     )