Spaces:

akameswa
/

DiffusionDemo

Paused

App Files Files Community

akameswa commited on Apr 10, 2024

Commit

04ef268

verified ·

1 Parent(s): f9d9229

Upload 37 files

Browse files

Files changed (38) hide show

.gitattributes +2 -0
html/circular.html +32 -0
html/denoising.html +16 -0
html/embeddings.html +75 -0
html/guidance.html +17 -0
html/inpainting.html +14 -0
html/interpolate.html +24 -0
html/negative.html +15 -0
html/perturbations.html +35 -0
html/poke.html +21 -0
html/seeds.html +25 -0
images/circular.gif +3 -0
images/circular.png +0 -0
images/denoising.png +0 -0
images/guidance.png +0 -0
images/inpainting.png +0 -0
images/interpolate.gif +3 -0
images/interpolate.png +0 -0
images/negative.png +0 -0
images/perturbations.png +0 -0
images/poke.png +0 -0
images/seeds.png +0 -0
run.py +1029 -0
src/__init__.py +2 -0
src/pipelines/__init__.py +9 -0
src/pipelines/circular.py +52 -0
src/pipelines/embeddings.py +196 -0
src/pipelines/guidance.py +39 -0
src/pipelines/inpainting.py +41 -0
src/pipelines/interpolate.py +51 -0
src/pipelines/negative.py +37 -0
src/pipelines/perturbations.py +62 -0
src/pipelines/poke.py +83 -0
src/pipelines/seed.py +32 -0
src/util/__init__.py +3 -0
src/util/base.py +304 -0
src/util/clip_config.py +114 -0
src/util/params.py +96 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/circular.gif filter=lfs diff=lfs merge=lfs -text
+images/interpolate.gif filter=lfs diff=lfs merge=lfs -text

html/circular.html ADDED Viewed

	@@ -0,0 +1,32 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                This tab generates a circular trajectory through latent space that begins and ends with the same image.
+                If we specify a large number of steps around the circle, the successive images will be closely related, resulting in a gradual deformation that produces a nice animation.
+            </p>
+            <p style="font-weight: bold;">
+                Additional Controls:
+            </p>
+            <p style="font-weight: bold;">
+                Number of Steps around the Circle:
+            </p>
+            <p>
+                Specify the number of images to produce along the circular path.
+            </p>
+            <p style="font-weight: bold;">
+                Proportion of Circle:
+            </p>
+            <p>
+                Sets the proportion of the circle to cover during image generation.
+                Ranges from 0 to 360 degrees.
+                Using a high step count with a small number of degrees allows you to explore very subtle image transformations.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/circular.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/denoising.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                This tab displays the intermediate images generated during the denoising process.
+                Seeing these intermediate images provides insight into how the diffusion model progressively adds detail at each step.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/denoising.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>
+</details>

html/embeddings.html ADDED Viewed

	@@ -0,0 +1,75 @@

+<head>
+    <link rel="stylesheet" type="text/css" href="styles.css">
+</head>
+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="background-color: #D87F2B; padding-left: 10px;">
+        <p style="font-weight: bold;">
+            Basic Exploration
+        </p>
+        The top part of the embeddings tab is the 3D plot of semantic feature space.
+        At the bottom of the tab there are expandable panels that can be opened to reveal more advanced features
+        <ul>
+            <li>
+                <strong>
+                    Explore the 3D semantic feature space:
+                </strong>
+                Click and drag in the 3D semantic feature space to rotate the view.
+                Use the scroll wheel to zoom in and out.
+                Hold down the control key and click and drag to pan the view.
+            </li>
+            <li>
+                <strong>
+                    Find the generated image:
+                </strong>
+                Hover over a point in the semantic feature space, and a window will pop up showing a generated image from this one-word prompt.
+                On left click, the image will be downloaded.
+            </li>
+            <li>
+                <strong>
+                    Find the embedding vector display:
+                </strong>
+                Hover over a word in the 3D semantic feature space, and an embedding vector display at the bottom of the tab shows the corresponding embedding vector.
+            </li>
+            <li>
+                <strong>
+                    Add/remove words from the 3D plot:
+                </strong>
+                Type a word in the Add/Remove word text box below the 3D plot to add a word to the plot, or if the word is already present, remove it from the plot.
+                You can also type multiple words separated by spaces or commas.
+            </li>
+            <li>
+                <strong>
+                    Change image for word in the 3D plot:
+                </strong>
+                Type a word in the Change image for word text box below the 3D plot to generate a new image for the corresponding word in the plot.
+            </li>
+        </ul>
+        <p style="font-weight: bold; margin-top: 10px;">
+            Semantic Dimensions
+        </p>
+        <ul>
+            <li>
+                <strong>Select a different semantic dimension.</strong><br>
+                Open the Custom Semantic Dimensions panel and choose another dimension for the X or Y or Z axis.
+                See how the display changes.
+            </li>
+            <li>
+                <strong>Alter a semantic dimension.</strong><br>
+                Examine the positive and negative word pairs used to define the semantic dimension.
+                You can change these pairs to alter the semantic dimension.
+            </li>
+            <li>
+                <strong>Define a new semantic dimension.</strong><br>
+                Pick a new semantic dimension that you can define using pairs of opposed words.
+                For example, you could define a "tense" dimension with pairs such as eat/ate, go/went, see/saw, and is/was to contrast present and past tense forms of verbs.
+            </li>
+        </ul>
+    </div>
+</details>

html/guidance.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Guidance is responsible for making the target image adhere to the prompt.
+                A higher value enforces this relation, whereas a lower value does not.
+                For example, a guidance scale of 1 produces a distorted grayscale image, whereas 50 produces a distorted, oversaturated image.
+                The default value of 8 produces normal-looking images that reasonably adhere to the prompt.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/guidance.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/inpainting.html ADDED Viewed

	@@ -0,0 +1,14 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Unlike poke, which globally alters the target image via a perturbation in the initial latent noise, inpainting alters just the region of the perturbation and allows us to specify the change we want to make.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/inpainting.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/interpolate.html ADDED Viewed

	@@ -0,0 +1,24 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                This tab generates noise patterns for two text prompts and then interpolates between them, gradually transforming from the first to the second.
+                With a large number of perturbation steps the transformation is very gradual and makes a nice animation.
+            </p>
+            <p style="font-weight: bold;">
+                Additional Controls:
+            </p>
+            <p style="font-weight: bold;">
+                Number of Interpolation Steps:
+            </p>
+            <p>
+                Defines the number of intermediate images to generate between the two prompts.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/interpolate.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/negative.html ADDED Viewed

	@@ -0,0 +1,15 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Negative prompts steer images away from unwanted features.
+                For example, “red” as a negative prompt makes the generated image unlikely to have reddish hues.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/negative.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/perturbations.html ADDED Viewed

	@@ -0,0 +1,35 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Perturbations enables the exploration of the latent space around a seed.
+                Perturbing the noise from an initial seed towards the noise from a different seed illustrates the variations in images obtainable from a local region of latent space.
+                Using a small perturbation size produces target images that closely resemble the one from the initial seed.
+                Larger perturbations traverse more distance in latent space towards the second seed, resulting in greater variation in the generated images.
+            </p>
+            <p style="font-weight: bold;">
+                Additional Controls:
+            </p>
+            <p style="font-weight: bold;">
+                Number of Perturbations:
+            </p>
+            <p>
+                Specify the number of perturbations to create, i.e., the number of seeds to use. More perturbations produce more images.
+            </p>
+            <p style="font-weight: bold;">
+                Perturbation Size:
+            </p>
+            <p>
+                Controls the perturbation magnitude, ranging from 0 to 1.
+                With a value of 0, all images will match the one from the initial seed.
+                With a value of 1, images will have no connection to the initial seed.
+                A value such as 0.1 is recommended.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/perturbations.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/poke.html ADDED Viewed

	@@ -0,0 +1,21 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Poke explores how perturbations in a local region of the initial latent noise impact the target image.
+                A small perturbation to the initial latent noise gets carried through the denoising process, demonstrating the global effect it can produce.
+            </p>
+            <p style="font-weight: bold;">
+                Additional Controls:
+            </p>
+            <p>
+                You can adjust the perturbation through the X, Y, height, and width controls.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/poke.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

html/seeds.html ADDED Viewed

	@@ -0,0 +1,25 @@

+<details open>
+    <summary style="background-color: #CE6400; padding-left: 10px;">
+        About
+    </summary>
+    <div style="display: flex; flex-direction: row; background-color: #D87F2B; padding-left: 10px;">
+        <div style="flex: 1;">
+            <p style="margin-top: 10px">
+                Seeds create the initial noise that gets refined into the target image.
+                Different seeds produce different noise patterns, hence the target image will differ even when prompted by the same text.
+                This tab produces multiple target images from the same text prompt to showcase how changing the seed changes the target image.
+            </p>
+            <p style="font-weight: bold;">
+                Additional Controls:
+            </p>
+            <p style="font-weight: bold;">
+                Number of Seeds:
+            </p>
+            <p>
+                Specify how many seed values to use.
+            </p>
+        </div>
+        <div style="flex: 1; align-content: center;">
+            <img src="https://raw.githubusercontent.com/touretzkyds/DiffusionDemo/master/images/seeds.png" style="max-width: 100%; height: auto; margin-top: 10px; margin-bottom: 10px; padding-left: 10px;">
+        </div>
+    </div>

images/circular.gif ADDED Viewed

Git LFS Details

SHA256: c6f6c81e8b01b3c68ea55d06bbb7769a3748f3e60ba1fdbcb11b18bb8bee38ec
Pointer size: 133 Bytes
Size of remote file: 18.6 MB

images/circular.png ADDED Viewed

images/denoising.png ADDED Viewed

images/guidance.png ADDED Viewed

images/inpainting.png ADDED Viewed

images/interpolate.gif ADDED Viewed

Git LFS Details

SHA256: 01e94a4b4272c7ef705957e59e6640afcad0f2ba675fc9051c87317c24785e2f
Pointer size: 132 Bytes
Size of remote file: 3.06 MB

images/interpolate.png ADDED Viewed

images/negative.png ADDED Viewed

images/perturbations.png ADDED Viewed

images/poke.png ADDED Viewed

images/seeds.png ADDED Viewed

run.py ADDED Viewed

	@@ -0,0 +1,1029 @@

+import base64
+import gradio as gr
+from PIL import Image
+from src.util import *
+from io import BytesIO
+from src.pipelines import *
+from threading import Thread
+from dash import Dash, dcc, html, Input, Output, no_update, callback
+app = Dash(__name__)
+app.layout = html.Div(
+    className="container",
+    children=[
+        dcc.Graph(
+            id="graph", figure=fig, clear_on_unhover=True, style={"height": "90vh"}
+        ),
+        dcc.Tooltip(id="tooltip"),
+        html.Div(id="word-emb-txt", style={"background-color": "white"}),
+        html.Div(id="word-emb-vis"),
+        html.Div(
+            [
+                html.Button(id="btn-download-image", hidden=True),
+                dcc.Download(id="download-image"),
+            ]
+        ),
+    ],
+)
+@callback(
+    Output("tooltip", "show"),
+    Output("tooltip", "bbox"),
+    Output("tooltip", "children"),
+    Output("tooltip", "direction"),
+    Output("word-emb-txt", "children"),
+    Output("word-emb-vis", "children"),
+    Input("graph", "hoverData"),
+)
+def display_hover(hoverData):
+    if hoverData is None:
+        return False, no_update, no_update, no_update, no_update, no_update
+    hover_data = hoverData["points"][0]
+    bbox = hover_data["bbox"]
+    direction = "left"
+    index = hover_data["pointNumber"]
+    children = [
+        html.Img(
+            src=images[index],
+            style={"width": "250px"},
+        ),
+        html.P(
+            hover_data["text"],
+            style={
+                "color": "black",
+                "font-size": "20px",
+                "text-align": "center",
+                "background-color": "white",
+                "margin": "5px",
+            },
+        ),
+    ]
+    emb_children = [
+        html.Img(
+            src=generate_word_emb_vis(hover_data["text"]),
+            style={"width": "100%", "height": "25px"},
+        ),
+    ]
+    return True, bbox, children, direction, hover_data["text"], emb_children
+@callback(
+    Output("download-image", "data"),
+    Input("graph", "clickData"),
+)
+def download_image(clickData):
+    if clickData is None:
+        return no_update
+    click_data = clickData["points"][0]
+    index = click_data["pointNumber"]
+    txt = click_data["text"]
+    img_encoded = images[index]
+    img_decoded = base64.b64decode(img_encoded.split(",")[1])
+    img = Image.open(BytesIO(img_decoded))
+    img.save(f"{txt}.png")
+    return dcc.send_file(f"{txt}.png")
+with gr.Blocks() as demo:
+    gr.Markdown("## Stable Diffusion Demo")
+    with gr.Tab("Latent Space"):
+        with gr.TabItem("Denoising"):
+            gr.Markdown("Observe the intermediate images during denoising.")
+            gr.HTML(read_html("DiffusionDemo/html/denoising.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_denoise = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_inference_steps_denoise = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps",
+                    )
+                    with gr.Row():
+                        seed_denoise = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_denoise = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_denoise = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_denoise = gr.Gallery(label="Images", selected_index=0)
+                    gif_denoise = gr.Image(label="GIF")
+                    zip_output_denoise = gr.File(label="Download ZIP")
+        @generate_images_button_denoise.click(
+            inputs=[prompt_denoise, seed_denoise, num_inference_steps_denoise],
+            outputs=[images_output_denoise, gif_denoise, zip_output_denoise],
+        )
+        def generate_images_wrapper(
+            prompt, seed, num_inference_steps, progress=gr.Progress()
+        ):
+            images, _ = display_poke_images(
+                prompt, seed, num_inference_steps, poke=False, intermediate=True
+            )
+            fname = "denoising"
+            tab_config = {
+                "Tab": "Denoising",
+                "Prompt": prompt,
+                "Number of Inference Steps": num_inference_steps,
+                "Seed": seed,
+            }
+            export_as_zip(images, fname, tab_config)
+            progress(1, desc="Exporting as gif")
+            export_as_gif(images, filename="denoising.gif")
+            return images, "outputs/denoising.gif", f"outputs/{fname}.zip"
+        seed_denoise.change(
+            fn=generate_seed_vis, inputs=[seed_denoise], outputs=[seed_vis_denoise]
+        )
+        with gr.TabItem("Seeds"):
+            gr.Markdown(
+                "Understand how different starting points in latent space can lead to different images."
+            )
+            gr.HTML(read_html("DiffusionDemo/html/seeds.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_seed = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_images_seed = gr.Slider(
+                        minimum=1, maximum=100, step=1, value=5, label="Number of Seeds"
+                    )
+                    num_inference_steps_seed = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    generate_images_button_seed = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_seed = gr.Gallery(label="Images", selected_index=0)
+                    zip_output_seed = gr.File(label="Download ZIP")
+        generate_images_button_seed.click(
+            fn=display_seed_images,
+            inputs=[prompt_seed, num_inference_steps_seed, num_images_seed],
+            outputs=[images_output_seed, zip_output_seed],
+        )
+        with gr.TabItem("Perturbations"):
+            gr.Markdown("Explore different perturbations from a point in latent space.")
+            gr.HTML(read_html("DiffusionDemo/html/perturbations.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_perturb = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_images_perturb = gr.Slider(
+                        minimum=0,
+                        maximum=100,
+                        step=1,
+                        value=5,
+                        label="Number of Perturbations",
+                    )
+                    perturbation_size_perturb = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        step=0.1,
+                        value=0.1,
+                        label="Perturbation Size",
+                    )
+                    num_inference_steps_perturb = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_perturb = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_perturb = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_perturb = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_perturb = gr.Gallery(label="Image", selected_index=0)
+                    zip_output_perturb = gr.File(label="Download ZIP")
+        generate_images_button_perturb.click(
+            fn=display_perturb_images,
+            inputs=[
+                prompt_perturb,
+                seed_perturb,
+                num_inference_steps_perturb,
+                num_images_perturb,
+                perturbation_size_perturb,
+            ],
+            outputs=[images_output_perturb, zip_output_perturb],
+        )
+        seed_perturb.change(
+            fn=generate_seed_vis, inputs=[seed_perturb], outputs=[seed_vis_perturb]
+        )
+        with gr.TabItem("Circular"):
+            gr.Markdown(
+                "Generate a circular path in latent space and observe how the images vary along the path."
+            )
+            gr.HTML(read_html("DiffusionDemo/html/circular.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_circular = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_images_circular = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=5,
+                        label="Number of Steps around the Circle",
+                    )
+                    with gr.Row():
+                        degree_circular = gr.Slider(
+                            minimum=0,
+                            maximum=360,
+                            step=1,
+                            value=360,
+                            label="Proportion of Circle",
+                            info="Enter the value in degrees",
+                        )
+                        step_size_circular = gr.Textbox(
+                            label="Step Size", value=360 / 5
+                        )
+                    num_inference_steps_circular = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_circular = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_circular = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_circular = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_circular = gr.Gallery(label="Image", selected_index=0)
+                    gif_circular = gr.Image(label="GIF")
+                    zip_output_circular = gr.File(label="Download ZIP")
+        num_images_circular.change(
+            fn=calculate_step_size,
+            inputs=[num_images_circular, degree_circular],
+            outputs=[step_size_circular],
+        )
+        degree_circular.change(
+            fn=calculate_step_size,
+            inputs=[num_images_circular, degree_circular],
+            outputs=[step_size_circular],
+        )
+        generate_images_button_circular.click(
+            fn=display_circular_images,
+            inputs=[
+                prompt_circular,
+                seed_circular,
+                num_inference_steps_circular,
+                num_images_circular,
+                degree_circular,
+            ],
+            outputs=[images_output_circular, gif_circular, zip_output_circular],
+        )
+        seed_circular.change(
+            fn=generate_seed_vis, inputs=[seed_circular], outputs=[seed_vis_circular]
+        )
+        with gr.TabItem("Poke"):
+            gr.Markdown("Perturb a region in the image and observe the effect.")
+            gr.HTML(read_html("DiffusionDemo/html/poke.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_poke = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_inference_steps_poke = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_poke = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_poke = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    pokeX = gr.Slider(
+                        label="pokeX",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=32,
+                        info="X coordinate of poke center",
+                    )
+                    pokeY = gr.Slider(
+                        label="pokeY",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=32,
+                        info="Y coordinate of poke center",
+                    )
+                    pokeHeight = gr.Slider(
+                        label="pokeHeight",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=8,
+                        info="Height of the poke",
+                    )
+                    pokeWidth = gr.Slider(
+                        label="pokeWidth",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=8,
+                        info="Width of the poke",
+                    )
+                    generate_images_button_poke = gr.Button("Generate Images")
+                with gr.Column():
+                    original_images_output_poke = gr.Image(
+                        value=visualize_poke(32, 32, 8, 8)[0], label="Original Image"
+                    )
+                    poked_images_output_poke = gr.Image(
+                        value=visualize_poke(32, 32, 8, 8)[1], label="Poked Image"
+                    )
+                    zip_output_poke = gr.File(label="Download ZIP")
+        pokeX.change(
+            visualize_poke,
+            inputs=[pokeX, pokeY, pokeHeight, pokeWidth],
+            outputs=[original_images_output_poke, poked_images_output_poke],
+        )
+        pokeY.change(
+            visualize_poke,
+            inputs=[pokeX, pokeY, pokeHeight, pokeWidth],
+            outputs=[original_images_output_poke, poked_images_output_poke],
+        )
+        pokeHeight.change(
+            visualize_poke,
+            inputs=[pokeX, pokeY, pokeHeight, pokeWidth],
+            outputs=[original_images_output_poke, poked_images_output_poke],
+        )
+        pokeWidth.change(
+            visualize_poke,
+            inputs=[pokeX, pokeY, pokeHeight, pokeWidth],
+            outputs=[original_images_output_poke, poked_images_output_poke],
+        )
+        seed_poke.change(
+            fn=generate_seed_vis, inputs=[seed_poke], outputs=[seed_vis_poke]
+        )
+        @generate_images_button_poke.click(
+            inputs=[
+                prompt_poke,
+                seed_poke,
+                num_inference_steps_poke,
+                pokeX,
+                pokeY,
+                pokeHeight,
+                pokeWidth,
+            ],
+            outputs=[
+                original_images_output_poke,
+                poked_images_output_poke,
+                zip_output_poke,
+            ],
+        )
+        def generate_images_wrapper(
+            prompt,
+            seed,
+            num_inference_steps,
+            pokeX=pokeX,
+            pokeY=pokeY,
+            pokeHeight=pokeHeight,
+            pokeWidth=pokeWidth,
+        ):
+            _, _ = display_poke_images(
+                prompt,
+                seed,
+                num_inference_steps,
+                poke=True,
+                pokeX=pokeX,
+                pokeY=pokeY,
+                pokeHeight=pokeHeight,
+                pokeWidth=pokeWidth,
+                intermediate=False,
+            )
+            images, modImages = visualize_poke(pokeX, pokeY, pokeHeight, pokeWidth)
+            fname = "poke"
+            tab_config = {
+                "Tab": "Poke",
+                "Prompt": prompt,
+                "Number of Inference Steps per Image": num_inference_steps,
+                "Seed": seed,
+                "PokeX": pokeX,
+                "PokeY": pokeY,
+                "PokeHeight": pokeHeight,
+                "PokeWidth": pokeWidth,
+            }
+            imgs_list = []
+            imgs_list.append((images, "Original Image"))
+            imgs_list.append((modImages, "Poked Image"))
+            export_as_zip(imgs_list, fname, tab_config)
+            return images, modImages, f"outputs/{fname}.zip"
+        with gr.TabItem("Guidance"):
+            gr.Markdown("Observe the effect of different guidance scales.")
+            gr.HTML(read_html("DiffusionDemo/html/guidance.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_guidance = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    num_inference_steps_guidance = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    guidance_scale_values = gr.Textbox(
+                        lines=1, value="1, 8, 20, 30", label="Guidance Scale Values"
+                    )
+                    with gr.Row():
+                        seed_guidance = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_guidance = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_guidance = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_guidance = gr.Gallery(
+                        label="Images", selected_index=0
+                    )
+                    zip_output_guidance = gr.File(label="Download ZIP")
+        generate_images_button_guidance.click(
+            fn=display_guidance_images,
+            inputs=[
+                prompt_guidance,
+                seed_guidance,
+                num_inference_steps_guidance,
+                guidance_scale_values,
+            ],
+            outputs=[images_output_guidance, zip_output_guidance],
+        )
+        seed_guidance.change(
+            fn=generate_seed_vis, inputs=[seed_guidance], outputs=[seed_vis_guidance]
+        )
+        with gr.TabItem("Inpainting"):
+            gr.Markdown("Inpaint the image based on the prompt.")
+            gr.HTML(read_html("DiffusionDemo/html/inpainting.html"))
+            with gr.Row():
+                with gr.Column():
+                    uploaded_img_inpaint = gr.Image(
+                        source="upload", tool="sketch", type="pil", label="Upload"
+                    )
+                    prompt_inpaint = gr.Textbox(
+                        lines=1, label="Prompt", value="sunglasses"
+                    )
+                    num_inference_steps_inpaint = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_inpaint = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_inpaint = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    inpaint_button = gr.Button("Inpaint")
+                with gr.Column():
+                    images_output_inpaint = gr.Image(label="Output")
+                    zip_output_inpaint = gr.File(label="Download ZIP")
+        inpaint_button.click(
+            fn=inpaint,
+            inputs=[
+                uploaded_img_inpaint,
+                num_inference_steps_inpaint,
+                seed_inpaint,
+                prompt_inpaint,
+            ],
+            outputs=[images_output_inpaint, zip_output_inpaint],
+        )
+        seed_inpaint.change(
+            fn=generate_seed_vis, inputs=[seed_inpaint], outputs=[seed_vis_inpaint]
+        )
+    with gr.Tab("CLIP Space"):
+        with gr.TabItem("Embeddings"):
+            gr.Markdown(
+                "Visualize text embedding space in 3D with input texts and output images based on the chosen axis."
+            )
+            gr.HTML(read_html("DiffusionDemo/html/embeddings.html"))
+            with gr.Row():
+                output = gr.HTML(
+                    f"""
+                        <iframe id="html" src="{dash_tunnel}" style="width:100%; height:700px;"></iframe>
+                        """
+                )
+            with gr.Row():
+                word2add_rem = gr.Textbox(lines=1, label="Add/Remove word")
+                word2change = gr.Textbox(lines=1, label="Change image for word")
+                clear_words_button = gr.Button(value="Clear words")
+            with gr.Accordion("Custom Semantic Dimensions", open=False):
+                with gr.Row():
+                    axis_name_1 = gr.Textbox(label="Axis name", value="gender")
+                    which_axis_1 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_1"],
+                        label="Axis direction",
+                    )
+                    from_words_1 = gr.Textbox(
+                        lines=1,
+                        label="Positive",
+                        value="prince husband father son uncle",
+                    )
+                    to_words_1 = gr.Textbox(
+                        lines=1,
+                        label="Negative",
+                        value="princess wife mother daughter aunt",
+                    )
+                    submit_1 = gr.Button("Submit")
+                with gr.Row():
+                    axis_name_2 = gr.Textbox(label="Axis name", value="age")
+                    which_axis_2 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_2"],
+                        label="Axis direction",
+                    )
+                    from_words_2 = gr.Textbox(
+                        lines=1, label="Positive", value="man woman king queen father"
+                    )
+                    to_words_2 = gr.Textbox(
+                        lines=1, label="Negative", value="boy girl prince princess son"
+                    )
+                    submit_2 = gr.Button("Submit")
+                with gr.Row():
+                    axis_name_3 = gr.Textbox(label="Axis name", value="residual")
+                    which_axis_3 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_3"],
+                        label="Axis direction",
+                    )
+                    from_words_3 = gr.Textbox(lines=1, label="Positive")
+                    to_words_3 = gr.Textbox(lines=1, label="Negative")
+                    submit_3 = gr.Button("Submit")
+                with gr.Row():
+                    axis_name_4 = gr.Textbox(label="Axis name", value="number")
+                    which_axis_4 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_4"],
+                        label="Axis direction",
+                    )
+                    from_words_4 = gr.Textbox(
+                        lines=1,
+                        label="Positive",
+                        value="boys girls cats puppies computers",
+                    )
+                    to_words_4 = gr.Textbox(
+                        lines=1, label="Negative", value="boy girl cat puppy computer"
+                    )
+                    submit_4 = gr.Button("Submit")
+                with gr.Row():
+                    axis_name_5 = gr.Textbox(label="Axis name", value="royalty")
+                    which_axis_5 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_5"],
+                        label="Axis direction",
+                    )
+                    from_words_5 = gr.Textbox(
+                        lines=1,
+                        label="Positive",
+                        value="king queen prince princess duchess",
+                    )
+                    to_words_5 = gr.Textbox(
+                        lines=1, label="Negative", value="man woman boy girl woman"
+                    )
+                    submit_5 = gr.Button("Submit")
+                with gr.Row():
+                    axis_name_6 = gr.Textbox(label="Axis name")
+                    which_axis_6 = gr.Dropdown(
+                        choices=["X - Axis", "Y - Axis", "Z - Axis", "---"],
+                        value=whichAxisMap["which_axis_6"],
+                        label="Axis direction",
+                    )
+                    from_words_6 = gr.Textbox(lines=1, label="Positive")
+                    to_words_6 = gr.Textbox(lines=1, label="Negative")
+                    submit_6 = gr.Button("Submit")
+        @word2add_rem.submit(inputs=[word2add_rem], outputs=[output, word2add_rem])
+        def add_rem_word_and_clear(words):
+            return add_rem_word(words), ""
+        @word2change.submit(inputs=[word2change], outputs=[output, word2change])
+        def change_word_and_clear(word):
+            return change_word(word), ""
+        clear_words_button.click(fn=clear_words, outputs=[output])
+        @submit_1.click(
+            inputs=[axis_name_1, which_axis_1, from_words_1, to_words_1],
+            outputs=[
+                output,
+                which_axis_2,
+                which_axis_3,
+                which_axis_4,
+                which_axis_5,
+                which_axis_6,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_1"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_2"],
+                whichAxisMap["which_axis_3"],
+                whichAxisMap["which_axis_4"],
+                whichAxisMap["which_axis_5"],
+                whichAxisMap["which_axis_6"],
+            )
+        @submit_2.click(
+            inputs=[axis_name_2, which_axis_2, from_words_2, to_words_2],
+            outputs=[
+                output,
+                which_axis_1,
+                which_axis_3,
+                which_axis_4,
+                which_axis_5,
+                which_axis_6,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_2"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_1"],
+                whichAxisMap["which_axis_3"],
+                whichAxisMap["which_axis_4"],
+                whichAxisMap["which_axis_5"],
+                whichAxisMap["which_axis_6"],
+            )
+        @submit_3.click(
+            inputs=[axis_name_3, which_axis_3, from_words_3, to_words_3],
+            outputs=[
+                output,
+                which_axis_1,
+                which_axis_2,
+                which_axis_4,
+                which_axis_5,
+                which_axis_6,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_3"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_1"],
+                whichAxisMap["which_axis_2"],
+                whichAxisMap["which_axis_4"],
+                whichAxisMap["which_axis_5"],
+                whichAxisMap["which_axis_6"],
+            )
+        @submit_4.click(
+            inputs=[axis_name_4, which_axis_4, from_words_4, to_words_4],
+            outputs=[
+                output,
+                which_axis_1,
+                which_axis_2,
+                which_axis_3,
+                which_axis_5,
+                which_axis_6,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_4"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_1"],
+                whichAxisMap["which_axis_2"],
+                whichAxisMap["which_axis_3"],
+                whichAxisMap["which_axis_5"],
+                whichAxisMap["which_axis_6"],
+            )
+        @submit_5.click(
+            inputs=[axis_name_5, which_axis_5, from_words_5, to_words_5],
+            outputs=[
+                output,
+                which_axis_1,
+                which_axis_2,
+                which_axis_3,
+                which_axis_4,
+                which_axis_6,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_5"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_1"],
+                whichAxisMap["which_axis_2"],
+                whichAxisMap["which_axis_3"],
+                whichAxisMap["which_axis_4"],
+                whichAxisMap["which_axis_6"],
+            )
+        @submit_6.click(
+            inputs=[axis_name_6, which_axis_6, from_words_6, to_words_6],
+            outputs=[
+                output,
+                which_axis_1,
+                which_axis_2,
+                which_axis_3,
+                which_axis_4,
+                which_axis_5,
+            ],
+        )
+        def set_axis_wrapper(axis_name, which_axis, from_words, to_words):
+            for ax in whichAxisMap:
+                if whichAxisMap[ax] == which_axis:
+                    whichAxisMap[ax] = "---"
+            whichAxisMap["which_axis_6"] = which_axis
+            return (
+                set_axis(axis_name, which_axis, from_words, to_words),
+                whichAxisMap["which_axis_1"],
+                whichAxisMap["which_axis_2"],
+                whichAxisMap["which_axis_3"],
+                whichAxisMap["which_axis_4"],
+                whichAxisMap["which_axis_5"],
+            )
+        with gr.TabItem("Interpolate"):
+            gr.Markdown(
+                "Interpolate between the first and the second prompt, and observe how the output changes."
+            )
+            gr.HTML(read_html("DiffusionDemo/html/interpolate.html"))
+            with gr.Row():
+                with gr.Column():
+                    promptA = gr.Textbox(
+                        lines=1,
+                        label="First Prompt",
+                        value="Self-portrait oil painting, a beautiful man with golden hair, 8k",
+                    )
+                    promptB = gr.Textbox(
+                        lines=1,
+                        label="Second Prompt",
+                        value="Self-portrait oil painting, a beautiful woman with golden hair, 8k",
+                    )
+                    num_images_interpolate = gr.Slider(
+                        minimum=0,
+                        maximum=100,
+                        step=1,
+                        value=5,
+                        label="Number of Interpolation Steps",
+                    )
+                    num_inference_steps_interpolate = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_interpolate = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_interpolate = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_interpolate = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_interpolate = gr.Gallery(
+                        label="Interpolated Images", selected_index=0
+                    )
+                    gif_interpolate = gr.Image(label="GIF")
+                    zip_output_interpolate = gr.File(label="Download ZIP")
+        generate_images_button_interpolate.click(
+            fn=display_interpolate_images,
+            inputs=[
+                seed_interpolate,
+                promptA,
+                promptB,
+                num_inference_steps_interpolate,
+                num_images_interpolate,
+            ],
+            outputs=[
+                images_output_interpolate,
+                gif_interpolate,
+                zip_output_interpolate,
+            ],
+        )
+        seed_interpolate.change(
+            fn=generate_seed_vis,
+            inputs=[seed_interpolate],
+            outputs=[seed_vis_interpolate],
+        )
+        with gr.TabItem("Negative"):
+            gr.Markdown("Observe the effect of negative prompts.")
+            gr.HTML(read_html("DiffusionDemo/html/negative.html"))
+            with gr.Row():
+                with gr.Column():
+                    prompt_negative = gr.Textbox(
+                        lines=1,
+                        label="Prompt",
+                        value="Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
+                    )
+                    neg_prompt = gr.Textbox(
+                        lines=1, label="Negative Prompt", value="Yellow"
+                    )
+                    num_inference_steps_negative = gr.Slider(
+                        minimum=2,
+                        maximum=100,
+                        step=1,
+                        value=8,
+                        label="Number of Inference Steps per Image",
+                    )
+                    with gr.Row():
+                        seed_negative = gr.Slider(
+                            minimum=0, maximum=100, step=1, value=14, label="Seed"
+                        )
+                        seed_vis_negative = gr.Plot(
+                            value=generate_seed_vis(14), label="Seed"
+                        )
+                    generate_images_button_negative = gr.Button("Generate Images")
+                with gr.Column():
+                    images_output_negative = gr.Image(
+                        label="Image without Negative Prompt"
+                    )
+                    images_neg_output_negative = gr.Image(
+                        label="Image with Negative Prompt"
+                    )
+                    zip_output_negative = gr.File(label="Download ZIP")
+        seed_negative.change(
+            fn=generate_seed_vis, inputs=[seed_negative], outputs=[seed_vis_negative]
+        )
+        generate_images_button_negative.click(
+            fn=display_negative_images,
+            inputs=[
+                prompt_negative,
+                seed_negative,
+                num_inference_steps_negative,
+                neg_prompt,
+            ],
+            outputs=[
+                images_output_negative,
+                images_neg_output_negative,
+                zip_output_negative,
+            ],
+        )
+    with gr.Tab("Credits"):
+        gr.Markdown("""
+                    Author: Adithya Kameswara Rao, Carnegie Mellon University.
+                    Advisor: David S. Touretzky, Carnegie Mellon University.
+                    This work was funded by a grant from NEOM Company, and by National Science Foundation award IIS-2112633.
+                    """)
+def run_dash():
+    app.run(host="127.0.0.1", port="8000")
+def run_gradio():
+    demo.queue()
+    _, _, public_url = demo.launch(share=True)
+    return public_url
+# if __name__ == "__main__":
+#     thread = Thread(target=run_dash)
+#     thread.daemon = True
+#     thread.start()
+#     try:
+#         run_gradio()
+#     except KeyboardInterrupt:
+#         print("Server closed")

src/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from . import util
2	+ from . import pipelines

src/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .circular import *
+from .embeddings import *
+from .interpolate import *
+from .poke import *
+from .seed import *
+from .perturbations import *
+from .negative import *
+from .guidance import *
+from .inpainting import *

src/pipelines/circular.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import numpy as np
+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def display_circular_images(
+    prompt, seed, num_inference_steps, num_images, degree, progress=gr.Progress()
+):
+    np.random.seed(seed)
+    text_embeddings = get_text_embeddings(prompt)
+    latents_x = generate_latents(seed)
+    latents_y = generate_latents(seed * np.random.randint(0, 100000))
+    scale_x = torch.cos(
+        torch.linspace(0, 2, num_images) * torch.pi * (degree / 360)
+    ).to(torch_device)
+    scale_y = torch.sin(
+        torch.linspace(0, 2, num_images) * torch.pi * (degree / 360)
+    ).to(torch_device)
+    noise_x = torch.tensordot(scale_x, latents_x, dims=0)
+    noise_y = torch.tensordot(scale_y, latents_y, dims=0)
+    noise = noise_x + noise_y
+    progress(0)
+    images = []
+    for i in range(num_images):
+        progress(i / num_images)
+        image = generate_images(noise[i], text_embeddings, num_inference_steps)
+        images.append((image, "{}".format(i)))
+    progress(1, desc="Exporting as gif")
+    export_as_gif(images, filename="circular.gif")
+    fname = "circular"
+    tab_config = {
+        "Tab": "Circular",
+        "Prompt": prompt,
+        "Number of Steps around the Circle": num_images,
+        "Proportion of Circle": degree,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    export_as_zip(images, fname, tab_config)
+    return images, "outputs/circular.gif", f"outputs/{fname}.zip"
+__all__ = ["display_circular_images"]

src/pipelines/embeddings.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import random
+import numpy as np
+import gradio as gr
+import matplotlib.pyplot as plt
+from diffusers import StableDiffusionPipeline
+import base64
+from io import BytesIO
+import plotly.express as px
+from src.util.base import *
+from src.util.params import *
+from src.util.clip_config import *
+age = get_axis_embeddings(young, old)
+gender = get_axis_embeddings(masculine, feminine)
+royalty = get_axis_embeddings(common, elite)
+images = []
+for example in examples:
+    image = pipe(
+        prompt=example,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+    ).images[0]
+    buffer = BytesIO()
+    image.save(buffer, format="JPEG")
+    encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    images.append("data:image/jpeg;base64, " + encoded_image)
+axis = np.vstack([gender, royalty, age])
+axis[1] = calculate_residual(axis, axis_names)
+coords = get_concat_embeddings(examples) @ axis.T
+coords[:, 1] = 5 * (1.0 - coords[:, 1])
+def update_fig():
+    global coords, examples, fig
+    fig.data[0].x = coords[:, 0]
+    fig.data[0].y = coords[:, 1]
+    fig.data[0].z = coords[:, 2]
+    fig.data[0].text = examples
+    return f"""
+            <script>
+                document.getElementById("html").src += "?rand={random.random()}"
+            </script>
+            <iframe id="html" src={dash_tunnel} style="width:100%; height:725px;"></iframe>
+            """
+def add_word(new_example):
+    global coords, images, examples
+    new_coord = get_concat_embeddings([new_example]) @ axis.T
+    new_coord[:, 1] = 5 * (1.0 - new_coord[:, 1])
+    coords = np.vstack([coords, new_coord])
+    image = pipe(
+        prompt=new_example,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+    ).images[0]
+    buffer = BytesIO()
+    image.save(buffer, format="JPEG")
+    encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    images.append("data:image/jpeg;base64, " + encoded_image)
+    examples.append(new_example)
+    return update_fig()
+def remove_word(new_example):
+    global coords, images, examples
+    examplesMap = {example: index for index, example in enumerate(examples)}
+    index = examplesMap[new_example]
+    coords = np.delete(coords, index, 0)
+    images.pop(index)
+    examples.pop(index)
+    return update_fig()
+def add_rem_word(new_examples):
+    global examples
+    new_examples = new_examples.replace(",", " ").split()
+    for new_example in new_examples:
+        if new_example in examples:
+            remove_word(new_example)
+            gr.Info("Removed {}".format(new_example))
+        else:
+            tokens = tokenizer.encode(new_example)
+            if len(tokens) != 3:
+                gr.Warning(f"{new_example} not found in embeddings")
+            else:
+                add_word(new_example)
+                gr.Info("Added {}".format(new_example))
+    return update_fig()
+def set_axis(axis_name, which_axis, from_words, to_words):
+    global coords, examples, fig, axis_names
+    if axis_name != "residual":
+        from_words, to_words = (
+            from_words.replace(",", " ").split(),
+            to_words.replace(",", " ").split(),
+        )
+        axis_emb = get_axis_embeddings(from_words, to_words)
+        axis[axisMap[which_axis]] = axis_emb
+        axis_names[axisMap[which_axis]] = axis_name
+        for i, name in enumerate(axis_names):
+            if name == "residual":
+                axis[i] = calculate_residual(axis, axis_names, from_words, to_words, i)
+                axis_names[i] = "residual"
+    else:
+        residual = calculate_residual(
+            axis, axis_names, residual_axis=axisMap[which_axis]
+        )
+        axis[axisMap[which_axis]] = residual
+        axis_names[axisMap[which_axis]] = axis_name
+    coords = get_concat_embeddings(examples) @ axis.T
+    coords[:, 1] = 5 * (1.0 - coords[:, 1])
+    fig.update_layout(
+        scene=dict(
+            xaxis_title=axis_names[0],
+            yaxis_title=axis_names[1],
+            zaxis_title=axis_names[2],
+        )
+    )
+    return update_fig()
+def change_word(examples):
+    examples = examples.replace(",", " ").split()
+    for example in examples:
+        remove_word(example)
+        add_word(example)
+        gr.Info("Changed image for {}".format(example))
+    return update_fig()
+def clear_words():
+    while examples:
+        remove_word(examples[-1])
+    return update_fig()
+def generate_word_emb_vis(prompt):
+    buf = BytesIO()
+    emb = get_word_embeddings(prompt).reshape(77, 768)[1]
+    plt.imsave(buf, [emb], cmap="inferno")
+    img = "data:image/jpeg;base64, " + base64.b64encode(buf.getvalue()).decode("utf-8")
+    return img
+fig = px.scatter_3d(
+    x=coords[:, 0],
+    y=coords[:, 1],
+    z=coords[:, 2],
+    labels={
+        "x": axis_names[0],
+        "y": axis_names[1],
+        "z": axis_names[2],
+    },
+    text=examples,
+    height=750,
+)
+fig.update_layout(
+    margin=dict(l=0, r=0, b=0, t=0), scene_camera=dict(eye=dict(x=2, y=2, z=0.1))
+)
+fig.update_traces(hoverinfo="none", hovertemplate=None)
+__all__ = [
+    "fig",
+    "update_fig",
+    "coords",
+    "images",
+    "examples",
+    "add_word",
+    "remove_word",
+    "add_rem_word",
+    "change_word",
+    "clear_words",
+    "generate_word_emb_vis",
+    "set_axis",
+    "axis",
+]

src/pipelines/guidance.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def display_guidance_images(
+    prompt, seed, num_inference_steps, guidance_values, progress=gr.Progress()
+):
+    text_embeddings = get_text_embeddings(prompt)
+    latents = generate_latents(seed)
+    progress(0)
+    images = []
+    guidance_values = guidance_values.replace(",", " ").split()
+    num_images = len(guidance_values)
+    for i in range(num_images):
+        progress(i / num_images)
+        image = generate_images(
+            latents,
+            text_embeddings,
+            num_inference_steps,
+            guidance_scale=int(guidance_values[i]),
+        )
+        images.append((image, "{}".format(int(guidance_values[i]))))
+    fname = "guidance"
+    tab_config = {
+        "Tab": "Guidance",
+        "Prompt": prompt,
+        "Guidance Scale Values": guidance_values,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    export_as_zip(images, fname, tab_config)
+    return images, f"outputs/{fname}.zip"
+__all__ = ["display_guidance_images"]

src/pipelines/inpainting.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+from diffusers import AutoPipelineForInpainting
+# inpaint_pipe = AutoPipelineForInpainting.from_pretrained(inpaint_model_path).to(torch_device)
+inpaint_pipe = AutoPipelineForInpainting.from_pipe(pipe).to(torch_device)
+def inpaint(dict, num_inference_steps, seed, prompt="", progress=gr.Progress()):
+    progress(0)
+    mask = dict["mask"].convert("RGB").resize((imageHeight, imageWidth))
+    init_image = dict["image"].convert("RGB").resize((imageHeight, imageWidth))
+    output = inpaint_pipe(
+        prompt=prompt,
+        image=init_image,
+        mask_image=mask,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        generator=torch.Generator().manual_seed(seed),
+    )
+    progress(1)
+    fname = "inpainting"
+    tab_config = {
+        "Tab": "Inpainting",
+        "Prompt": prompt,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    imgs_list = []
+    imgs_list.append((output.images[0], "Inpainted Image"))
+    imgs_list.append((mask, "Mask"))
+    export_as_zip(imgs_list, fname, tab_config)
+    return output.images[0], f"outputs/{fname}.zip"
+__all__ = ["inpaint"]

src/pipelines/interpolate.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def interpolate_prompts(promptA, promptB, num_interpolation_steps):
+    text_embeddingsA = get_text_embeddings(promptA)
+    text_embeddingsB = get_text_embeddings(promptB)
+    interpolated_embeddings = []
+    for i in range(num_interpolation_steps):
+        alpha = i / num_interpolation_steps
+        interpolated_embedding = torch.lerp(text_embeddingsA, text_embeddingsB, alpha)
+        interpolated_embeddings.append(interpolated_embedding)
+    return interpolated_embeddings
+def display_interpolate_images(
+    seed, promptA, promptB, num_inference_steps, num_images, progress=gr.Progress()
+):
+    latents = generate_latents(seed)
+    num_images = num_images + 2  # add 2 for first and last image
+    text_embeddings = interpolate_prompts(promptA, promptB, num_images)
+    images = []
+    progress(0)
+    for i in range(num_images):
+        progress(i / num_images)
+        image = generate_images(latents, text_embeddings[i], num_inference_steps)
+        images.append((image, "{}".format(i + 1)))
+    progress(1, desc="Exporting as gif")
+    export_as_gif(images, filename="interpolate.gif", reverse=True)
+    fname = "interpolate"
+    tab_config = {
+        "Tab": "Interpolate",
+        "First Prompt": promptA,
+        "Second Prompt": promptB,
+        "Number of Interpolation Steps": num_images,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    export_as_zip(images, fname, tab_config)
+    return images, "outputs/interpolate.gif", f"outputs/{fname}.zip"
+__all__ = ["display_interpolate_images"]

src/pipelines/negative.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def display_negative_images(
+    prompt, seed, num_inference_steps, negative_prompt="", progress=gr.Progress()
+):
+    text_embeddings = get_text_embeddings(prompt)
+    text_embeddings_neg = get_text_embeddings(prompt, negative_prompt=negative_prompt)
+    latents = generate_latents(seed)
+    progress(0)
+    images = generate_images(latents, text_embeddings, num_inference_steps)
+    progress(0.5)
+    images_neg = generate_images(latents, text_embeddings_neg, num_inference_steps)
+    fname = "negative"
+    tab_config = {
+        "Tab": "Negative",
+        "Prompt": prompt,
+        "Negative Prompt": negative_prompt,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    imgs_list = []
+    imgs_list.append((images, "Without Negative Prompt"))
+    imgs_list.append((images_neg, "With Negative Prompt"))
+    export_as_zip(imgs_list, fname, tab_config)
+    return images, images_neg, f"outputs/{fname}.zip"
+__all__ = ["display_negative_images"]

src/pipelines/perturbations.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import numpy as np
+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def display_perturb_images(
+    prompt,
+    seed,
+    num_inference_steps,
+    num_images,
+    perturbation_size,
+    progress=gr.Progress(),
+):
+    text_embeddings = get_text_embeddings(prompt)
+    latents_x = generate_latents(seed)
+    scale_x = torch.cos(
+        torch.linspace(0, 2, num_images) * torch.pi * perturbation_size / 4
+    ).to(torch_device)
+    noise_x = torch.tensordot(scale_x, latents_x, dims=0)
+    progress(0)
+    images = []
+    images.append(
+        (
+            generate_images(latents_x, text_embeddings, num_inference_steps),
+            "{}".format(1),
+        )
+    )
+    for i in range(num_images):
+        np.random.seed(i)
+        progress(i / (num_images))
+        latents_y = generate_latents(np.random.randint(0, 100000))
+        scale_y = torch.sin(
+            torch.linspace(0, 2, num_images) * torch.pi * perturbation_size / 4
+        ).to(torch_device)
+        noise_y = torch.tensordot(scale_y, latents_y, dims=0)
+        noise = noise_x + noise_y
+        image = generate_images(
+            noise[num_images - 1], text_embeddings, num_inference_steps
+        )
+        images.append((image, "{}".format(i + 2)))
+    fname = "perturbations"
+    tab_config = {
+        "Tab": "Perturbations",
+        "Prompt": prompt,
+        "Number of Perturbations": num_images,
+        "Perturbation Size": perturbation_size,
+        "Number of Inference Steps per Image": num_inference_steps,
+        "Seed": seed,
+    }
+    export_as_zip(images, fname, tab_config)
+    return images, f"outputs/{fname}.zip"
+__all__ = ["display_perturb_images"]

src/pipelines/poke.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+from PIL import Image, ImageDraw
+def visualize_poke(
+    pokeX, pokeY, pokeHeight, pokeWidth, imageHeight=imageHeight, imageWidth=imageWidth
+):
+    if (
+        (pokeX - pokeWidth // 2 < 0)
+        or (pokeX + pokeWidth // 2 > imageWidth // 8)
+        or (pokeY - pokeHeight // 2 < 0)
+        or (pokeY + pokeHeight // 2 > imageHeight // 8)
+    ):
+        gr.Warning("Modification outside image")
+    shape = [
+        (pokeX * 8 - pokeWidth * 8 // 2, pokeY * 8 - pokeHeight * 8 // 2),
+        (pokeX * 8 + pokeWidth * 8 // 2, pokeY * 8 + pokeHeight * 8 // 2),
+    ]
+    blank = Image.new("RGB", (imageWidth, imageHeight))
+    if os.path.exists("outputs/original.png"):
+        oImg = Image.open("outputs/original.png")
+        pImg = Image.open("outputs/poked.png")
+    else:
+        oImg = blank
+        pImg = blank
+    oRec = ImageDraw.Draw(oImg)
+    pRec = ImageDraw.Draw(pImg)
+    oRec.rectangle(shape, outline="white")
+    pRec.rectangle(shape, outline="white")
+    return oImg, pImg
+def display_poke_images(
+    prompt,
+    seed,
+    num_inference_steps,
+    poke=False,
+    pokeX=None,
+    pokeY=None,
+    pokeHeight=None,
+    pokeWidth=None,
+    intermediate=False,
+    progress=gr.Progress(),
+):
+    text_embeddings = get_text_embeddings(prompt)
+    latents, modified_latents = generate_modified_latents(
+        poke, seed, pokeX, pokeY, pokeHeight, pokeWidth
+    )
+    progress(0)
+    images = generate_images(
+        latents, text_embeddings, num_inference_steps, intermediate=intermediate
+    )
+    if not intermediate:
+        images.save("outputs/original.png")
+    if poke:
+        progress(0.5)
+        modImages = generate_images(
+            modified_latents,
+            text_embeddings,
+            num_inference_steps,
+            intermediate=intermediate,
+        )
+        if not intermediate:
+            modImages.save("outputs/poked.png")
+    else:
+        modImages = None
+    return images, modImages
+__all__ = ["display_poke_images", "visualize_poke"]

src/pipelines/seed.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+from src.util.base import *
+from src.util.params import *
+def display_seed_images(
+    prompt, num_inference_steps, num_images, progress=gr.Progress()
+):
+    text_embeddings = get_text_embeddings(prompt)
+    images = []
+    progress(0)
+    for i in range(num_images):
+        progress(i / num_images)
+        latents = generate_latents(i)
+        image = generate_images(latents, text_embeddings, num_inference_steps)
+        images.append((image, "{}".format(i + 1)))
+    fname = "seeds"
+    tab_config = {
+        "Tab": "Seeds",
+        "Prompt": prompt,
+        "Number of Seeds": num_images,
+        "Number of Inference Steps per Image": num_inference_steps,
+    }
+    export_as_zip(images, fname, tab_config)
+    return images, f"outputs/{fname}.zip"
+__all__ = ["display_seed_images"]

src/util/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .base import *
+from .params import *
+from .clip_config import *

src/util/base.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import io
+import os
+import torch
+import zipfile
+import numpy as np
+import gradio as gr
+from PIL import Image
+from tqdm.auto import tqdm
+from src.util.params import *
+from src.util.clip_config import *
+import matplotlib.pyplot as plt
+def get_text_embeddings(
+    prompt,
+    tokenizer=tokenizer,
+    text_encoder=text_encoder,
+    torch_device=torch_device,
+    batch_size=1,
+    negative_prompt="",
+):
+    text_input = tokenizer(
+        prompt,
+        padding="max_length",
+        max_length=tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    )
+    with torch.no_grad():
+        text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]
+    max_length = text_input.input_ids.shape[-1]
+    uncond_input = tokenizer(
+        [negative_prompt] * batch_size,
+        padding="max_length",
+        max_length=max_length,
+        return_tensors="pt",
+    )
+    with torch.no_grad():
+        uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]
+    text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+    return text_embeddings
+def generate_latents(
+    seed,
+    height=imageHeight,
+    width=imageWidth,
+    torch_device=torch_device,
+    unet=unet,
+    batch_size=1,
+):
+    generator = torch.Generator().manual_seed(int(seed))
+    latents = torch.randn(
+        (batch_size, unet.config.in_channels, height // 8, width // 8),
+        generator=generator,
+    ).to(torch_device)
+    return latents
+def generate_modified_latents(
+    poke,
+    seed,
+    pokeX=None,
+    pokeY=None,
+    pokeHeight=None,
+    pokeWidth=None,
+    imageHeight=imageHeight,
+    imageWidth=imageWidth,
+):
+    original_latents = generate_latents(seed, height=imageHeight, width=imageWidth)
+    if poke:
+        np.random.seed(seed)
+        poke_latents = generate_latents(
+            np.random.randint(0, 100000), height=pokeHeight * 8, width=pokeWidth * 8
+        )
+        x_origin = pokeX - pokeWidth // 2
+        y_origin = pokeY - pokeHeight // 2
+        modified_latents = original_latents.clone()
+        modified_latents[
+            :, :, y_origin : y_origin + pokeHeight, x_origin : x_origin + pokeWidth
+        ] = poke_latents
+    else:
+        modified_latents = None
+    return original_latents, modified_latents
+def convert_to_pil_image(image):
+    image = (image / 2 + 0.5).clamp(0, 1)
+    image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
+    images = (image * 255).round().astype("uint8")
+    pil_images = [Image.fromarray(image) for image in images]
+    return pil_images[0]
+def generate_images(
+    latents,
+    text_embeddings,
+    num_inference_steps,
+    unet=unet,
+    guidance_scale=guidance_scale,
+    vae=vae,
+    scheduler=scheduler,
+    intermediate=False,
+    progress=gr.Progress(),
+):
+    scheduler.set_timesteps(num_inference_steps)
+    latents = latents * scheduler.init_noise_sigma
+    images = []
+    i = 1
+    for t in tqdm(scheduler.timesteps):
+        latent_model_input = torch.cat([latents] * 2)
+        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
+        with torch.no_grad():
+            noise_pred = unet(
+                latent_model_input, t, encoder_hidden_states=text_embeddings
+            ).sample
+        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+        noise_pred = noise_pred_uncond + guidance_scale * (
+            noise_pred_text - noise_pred_uncond
+        )
+        if intermediate:
+            progress(((1000 - t) / 1000))
+            Latents = 1 / 0.18215 * latents
+            with torch.no_grad():
+                image = vae.decode(Latents).sample
+                images.append((convert_to_pil_image(image), "{}".format(i)))
+        latents = scheduler.step(noise_pred, t, latents).prev_sample
+        i += 1
+    if not intermediate:
+        Latents = 1 / 0.18215 * latents
+        with torch.no_grad():
+            image = vae.decode(Latents).sample
+        images = convert_to_pil_image(image)
+    return images
+def get_word_embeddings(
+    prompt, tokenizer=tokenizer, text_encoder=text_encoder, torch_device=torch_device
+):
+    text_input = tokenizer(
+        prompt,
+        padding="max_length",
+        max_length=tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    ).to(torch_device)
+    with torch.no_grad():
+        text_embeddings = text_encoder(text_input.input_ids)[0].reshape(1, -1)
+    text_embeddings = text_embeddings.cpu().numpy()
+    return text_embeddings / np.linalg.norm(text_embeddings)
+def get_concat_embeddings(names, merge=False):
+    embeddings = []
+    for name in names:
+        embedding = get_word_embeddings(name)
+        embeddings.append(embedding)
+    embeddings = np.vstack(embeddings)
+    if merge:
+        embeddings = np.average(embeddings, axis=0).reshape(1, -1)
+    return embeddings
+def get_axis_embeddings(A, B):
+    emb = []
+    for a, b in zip(A, B):
+        e = get_word_embeddings(a) - get_word_embeddings(b)
+        emb.append(e)
+    emb = np.vstack(emb)
+    ax = np.average(emb, axis=0).reshape(1, -1)
+    return ax
+def calculate_residual(
+    axis, axis_names, from_words=None, to_words=None, residual_axis=1
+):
+    axis_indices = [0, 1, 2]
+    axis_indices.remove(residual_axis)
+    if axis_names[axis_indices[0]] in axis_combinations:
+        fembeddings = get_concat_embeddings(
+            axis_combinations[axis_names[axis_indices[0]]], merge=True
+        )
+    else:
+        axis_combinations[axis_names[axis_indices[0]]] = from_words + to_words
+        fembeddings = get_concat_embeddings(from_words + to_words, merge=True)
+    if axis_names[axis_indices[1]] in axis_combinations:
+        sembeddings = get_concat_embeddings(
+            axis_combinations[axis_names[axis_indices[1]]], merge=True
+        )
+    else:
+        axis_combinations[axis_names[axis_indices[1]]] = from_words + to_words
+        sembeddings = get_concat_embeddings(from_words + to_words, merge=True)
+    fprojections = fembeddings @ axis[axis_indices[0]].T
+    sprojections = sembeddings @ axis[axis_indices[1]].T
+    partial_residual = fembeddings - (fprojections.reshape(-1, 1) * fembeddings)
+    residual = partial_residual - (sprojections.reshape(-1, 1) * sembeddings)
+    return residual
+def calculate_step_size(num_images, differentiation):
+    return differentiation / (num_images - 1)
+def generate_seed_vis(seed):
+    np.random.seed(seed)
+    emb = np.random.rand(15)
+    plt.close()
+    plt.switch_backend("agg")
+    plt.figure(figsize=(10, 0.5))
+    plt.imshow([emb], cmap="viridis")
+    plt.axis("off")
+    return plt
+def export_as_gif(images, filename, frames_per_second=2, reverse=False):
+    imgs = [img[0] for img in images]
+    if reverse:
+        imgs += imgs[2:-1][::-1]
+    imgs[0].save(
+        f"outputs/{filename}",
+        format="GIF",
+        save_all=True,
+        append_images=imgs[1:],
+        duration=1000 // frames_per_second,
+        loop=0,
+    )
+def export_as_zip(images, fname, tab_config=None):
+    if not os.path.exists(f"outputs/{fname}.zip"):
+        os.makedirs("outputs", exist_ok=True)
+    with zipfile.ZipFile(f"outputs/{fname}.zip", "w") as img_zip:
+        if tab_config:
+            with open("outputs/config.txt", "w") as f:
+                for key, value in tab_config.items():
+                    f.write(f"{key}: {value}\n")
+                f.close()
+            img_zip.write("outputs/config.txt", "config.txt")
+        for idx, img in enumerate(images):
+            buff = io.BytesIO()
+            img[0].save(buff, format="PNG")
+            buff = buff.getvalue()
+            max_num = len(images)
+            num_leading_zeros = len(str(max_num))
+            img_name = f"{{:0{num_leading_zeros}}}.png"
+            img_zip.writestr(img_name.format(idx + 1), buff)
+def read_html(file_path):
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    return content
+__all__ = [
+    "get_text_embeddings",
+    "generate_latents",
+    "generate_modified_latents",
+    "generate_images",
+    "get_word_embeddings",
+    "get_concat_embeddings",
+    "get_axis_embeddings",
+    "calculate_residual",
+    "calculate_step_size",
+    "generate_seed_vis",
+    "export_as_gif",
+    "export_as_zip",
+    "read_html",
+]

src/util/clip_config.py ADDED Viewed

	@@ -0,0 +1,114 @@

+masculine = [
+    "man",
+    "king",
+    "prince",
+    "husband",
+    "father",
+]
+feminine = [
+    "woman",
+    "queen",
+    "princess",
+    "wife",
+    "mother",
+]
+young = [
+    "man",
+    "woman",
+    "king",
+    "queen",
+    "father",
+]
+old = [
+    "boy",
+    "girl",
+    "prince",
+    "princess",
+    "son",
+]
+common = [
+    "man",
+    "woman",
+    "boy",
+    "girl",
+    "woman",
+]
+elite = [
+    "king",
+    "queen",
+    "prince",
+    "princess",
+    "duchess",
+]
+singular = [
+    "boy",
+    "girl",
+    "cat",
+    "puppy",
+    "computer",
+]
+plural = [
+    "boys",
+    "girls",
+    "cats",
+    "puppies",
+    "computers",
+]
+examples = [
+    "king",
+    "queen",
+    "man",
+    "woman",
+    "boys",
+    "girls",
+    "apple",
+    "orange",
+]
+axis_names = ["gender", "residual", "age"]
+axis_combinations = {
+    "age": young + old,
+    "gender": masculine + feminine,
+    "royalty": common + elite,
+    "number": singular + plural,
+}
+axisMap = {
+    "X - Axis": 0,
+    "Y - Axis": 1,
+    "Z - Axis": 2,
+}
+whichAxisMap = {
+    "which_axis_1": "X - Axis",
+    "which_axis_2": "Z - Axis",
+    "which_axis_3": "Y - Axis",
+    "which_axis_4": "---",
+    "which_axis_5": "---",
+    "which_axis_6": "---",
+}
+__all__ = [
+    "axisMap",
+    "whichAxisMap",
+    "axis_names",
+    "axis_combinations",
+    "examples",
+    "masculine",
+    "feminine",
+    "young",
+    "old",
+    "common",
+    "elite",
+    "singular",
+    "plural",
+]

src/util/params.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+import secrets
+from gradio.networking import setup_tunnel
+from transformers import CLIPTextModel, CLIPTokenizer
+from diffusers import (
+    AutoencoderKL,
+    UNet2DConditionModel,
+    LCMScheduler,
+    DDIMScheduler,
+    StableDiffusionPipeline,
+)
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+isLCM = False
+HF_ACCESS_TOKEN = ""
+model_path = "segmind/small-sd"
+inpaint_model_path = "Lykon/dreamshaper-8-inpainting"
+prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
+promptA = "Self-portrait oil painting, a beautiful man with golden hair, 8k"
+promptB = "Self-portrait oil painting, a beautiful woman with golden hair, 8k"
+negative_prompt = "a photo frame"
+num_images = 5
+degree = 360
+perturbation_size = 0.1
+num_inference_steps = 8
+seed = 69420
+guidance_scale = 8
+guidance_values = "1, 8, 20"
+intermediate = True
+pokeX, pokeY = 256, 256
+pokeHeight, pokeWidth = 128, 128
+imageHeight, imageWidth = 512, 512
+tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer")
+text_encoder = CLIPTextModel.from_pretrained(model_path, subfolder="text_encoder").to(
+    torch_device
+)
+if isLCM:
+    scheduler = LCMScheduler.from_pretrained(model_path, subfolder="scheduler")
+else:
+    scheduler = DDIMScheduler.from_pretrained(model_path, subfolder="scheduler")
+unet = UNet2DConditionModel.from_pretrained(model_path, subfolder="unet").to(
+    torch_device
+)
+vae = AutoencoderKL.from_pretrained(model_path, subfolder="vae").to(torch_device)
+pipe = StableDiffusionPipeline(
+    tokenizer=tokenizer,
+    text_encoder=text_encoder,
+    unet=unet,
+    scheduler=scheduler,
+    vae=vae,
+    safety_checker=None,
+    feature_extractor=None,
+    requires_safety_checker=False,
+).to(torch_device)
+dash_tunnel = setup_tunnel("0.0.0.0", 8000, secrets.token_urlsafe(32))
+__all__ = [
+    "prompt",
+    "negative_prompt",
+    "num_images",
+    "degree",
+    "perturbation_size",
+    "num_inference_steps",
+    "seed",
+    "intermediate",
+    "pokeX",
+    "pokeY",
+    "pokeHeight",
+    "pokeWidth",
+    "promptA",
+    "promptB",
+    "tokenizer",
+    "text_encoder",
+    "scheduler",
+    "unet",
+    "vae",
+    "torch_device",
+    "imageHeight",
+    "imageWidth",
+    "guidance_scale",
+    "guidance_values",
+    "HF_ACCESS_TOKEN",
+    "model_path",
+    "inpaint_model_path",
+    "dash_tunnel",
+    "pipe",
+]