Spaces:
Build error
Build error
Commit
·
609badf
1
Parent(s):
1ad8665
Fix pipeline
Browse files- .gitignore +2 -1
- bapp.py +2 -1
- requirements.txt +1 -0
- test_video.py +1 -1
- visual_anagrams/animate.py +23 -20
- visual_anagrams/samplers.py +5 -4
.gitignore
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
env/
|
| 2 |
__pycache__/
|
| 3 |
*.png
|
| 4 |
-
*.mp4
|
|
|
|
|
|
| 1 |
env/
|
| 2 |
__pycache__/
|
| 3 |
*.png
|
| 4 |
+
*.mp4
|
| 5 |
+
*.gif
|
bapp.py
CHANGED
|
@@ -75,12 +75,13 @@ def generate_content(
|
|
| 75 |
choices = list(VIEW_MAP_NAMES.keys())
|
| 76 |
gradio_app = gr.Interface(
|
| 77 |
fn=generate_content,
|
|
|
|
| 78 |
inputs=[
|
| 79 |
gr.Textbox(label="Style", placeholder="an oil painting of"),
|
| 80 |
gr.Textbox(label="Prompt for original view", placeholder="a dress"),
|
| 81 |
gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
|
| 82 |
gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
|
| 83 |
-
gr.Number(label="Number of diffusion steps", value=
|
| 84 |
gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
|
| 85 |
],
|
| 86 |
outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
|
|
|
|
| 75 |
choices = list(VIEW_MAP_NAMES.keys())
|
| 76 |
gradio_app = gr.Interface(
|
| 77 |
fn=generate_content,
|
| 78 |
+
title="Multi-View Illusion Diffusion",
|
| 79 |
inputs=[
|
| 80 |
gr.Textbox(label="Style", placeholder="an oil painting of"),
|
| 81 |
gr.Textbox(label="Prompt for original view", placeholder="a dress"),
|
| 82 |
gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
|
| 83 |
gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
|
| 84 |
+
gr.Number(label="Number of diffusion steps", value=50, step=1, minimum=1, maximum=300),
|
| 85 |
gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
|
| 86 |
],
|
| 87 |
outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
|
requirements.txt
CHANGED
|
@@ -7,6 +7,7 @@ imageio
|
|
| 7 |
imageio[ffmpeg]
|
| 8 |
imageio[pyav]
|
| 9 |
opencv-python
|
|
|
|
| 10 |
safetensors
|
| 11 |
sentencepiece
|
| 12 |
transformers
|
|
|
|
| 7 |
imageio[ffmpeg]
|
| 8 |
imageio[pyav]
|
| 9 |
opencv-python
|
| 10 |
+
pygifsicle
|
| 11 |
safetensors
|
| 12 |
sentencepiece
|
| 13 |
transformers
|
test_video.py
CHANGED
|
@@ -7,5 +7,5 @@ if __name__ == "__main__":
|
|
| 7 |
get_views(["identity", "flip"])[1],
|
| 8 |
"a painting of vases",
|
| 9 |
"a painting of a sloth",
|
| 10 |
-
save_video_path="
|
| 11 |
)
|
|
|
|
| 7 |
get_views(["identity", "flip"])[1],
|
| 8 |
"a painting of vases",
|
| 9 |
"a painting of a sloth",
|
| 10 |
+
save_video_path="tmp.mp4",
|
| 11 |
)
|
visual_anagrams/animate.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import cv2
|
| 2 |
from tqdm import tqdm
|
| 3 |
import numpy as np
|
| 4 |
-
from PIL import Image, ImageDraw, ImageFont
|
| 5 |
import imageio
|
|
|
|
| 6 |
|
| 7 |
import torchvision.transforms.functional as TF
|
| 8 |
|
|
@@ -14,11 +15,12 @@ def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
|
|
| 14 |
image = image.copy()
|
| 15 |
|
| 16 |
# Font info
|
|
|
|
| 17 |
font_size = 16
|
| 18 |
|
| 19 |
# Make PIL objects
|
| 20 |
draw = ImageDraw.Draw(image)
|
| 21 |
-
font = ImageFont.
|
| 22 |
|
| 23 |
# Center text horizontally, and vertically between
|
| 24 |
# illusion bottom and frame bottom
|
|
@@ -50,9 +52,9 @@ def animate_two_view(
|
|
| 50 |
prompt_1,
|
| 51 |
prompt_2,
|
| 52 |
save_video_path='tmp.mp4',
|
| 53 |
-
hold_duration=
|
| 54 |
text_fade_duration=10,
|
| 55 |
-
transition_duration=
|
| 56 |
im_size=256,
|
| 57 |
frame_size=384,
|
| 58 |
):
|
|
@@ -114,22 +116,23 @@ def animate_two_view(
|
|
| 114 |
|
| 115 |
# Move last bit of clip to front
|
| 116 |
frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
|
|
|
|
| 1 |
import cv2
|
| 2 |
from tqdm import tqdm
|
| 3 |
import numpy as np
|
| 4 |
+
from PIL import Image, ImageDraw, ImageFont, ImageChops
|
| 5 |
import imageio
|
| 6 |
+
from pygifsicle import optimize
|
| 7 |
|
| 8 |
import torchvision.transforms.functional as TF
|
| 9 |
|
|
|
|
| 15 |
image = image.copy()
|
| 16 |
|
| 17 |
# Font info
|
| 18 |
+
font_path = get_courier_font_path()
|
| 19 |
font_size = 16
|
| 20 |
|
| 21 |
# Make PIL objects
|
| 22 |
draw = ImageDraw.Draw(image)
|
| 23 |
+
font = ImageFont.truetype(font_path, font_size)
|
| 24 |
|
| 25 |
# Center text horizontally, and vertically between
|
| 26 |
# illusion bottom and frame bottom
|
|
|
|
| 52 |
prompt_1,
|
| 53 |
prompt_2,
|
| 54 |
save_video_path='tmp.mp4',
|
| 55 |
+
hold_duration=60,
|
| 56 |
text_fade_duration=10,
|
| 57 |
+
transition_duration=80,
|
| 58 |
im_size=256,
|
| 59 |
frame_size=384,
|
| 60 |
):
|
|
|
|
| 116 |
|
| 117 |
# Move last bit of clip to front
|
| 118 |
frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
|
| 119 |
+
images = frames
|
| 120 |
+
|
| 121 |
+
processed_frames = [images[0]]
|
| 122 |
+
|
| 123 |
+
for i in range(1, len(images)):
|
| 124 |
+
# Calculate the difference between current and previous frame
|
| 125 |
+
diff = ImageChops.difference(images[i], images[i - 1])
|
| 126 |
+
# Create a mask to isolate changes
|
| 127 |
+
mask = diff.convert("L").point(lambda x: 0 if x < 5 else 255, "1")
|
| 128 |
+
# Apply the mask to the current frame
|
| 129 |
+
new_frame = ImageChops.composite(images[i], processed_frames[-1], mask)
|
| 130 |
+
processed_frames.append(new_frame)
|
| 131 |
+
|
| 132 |
+
# Save the frames as a GIF
|
| 133 |
+
imageio.mimsave(save_video_path,
|
| 134 |
+
[np.array(frame) for frame in processed_frames],
|
| 135 |
+
fps=30)
|
| 136 |
|
| 137 |
|
| 138 |
|
visual_anagrams/samplers.py
CHANGED
|
@@ -30,7 +30,7 @@ def sample_stage_1(model,
|
|
| 30 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
| 31 |
|
| 32 |
# Setup timesteps
|
| 33 |
-
model.scheduler.set_timesteps(num_inference_steps, device=device)
|
| 34 |
timesteps = model.scheduler.timesteps
|
| 35 |
|
| 36 |
# Make intermediate_images
|
|
@@ -45,7 +45,7 @@ def sample_stage_1(model,
|
|
| 45 |
)
|
| 46 |
# ic(noisy_images.shape)
|
| 47 |
|
| 48 |
-
for i, t in tqdm(
|
| 49 |
# Apply views to noisy_image
|
| 50 |
viewed_noisy_images = []
|
| 51 |
for view_fn in views:
|
|
@@ -109,6 +109,7 @@ def sample_stage_1(model,
|
|
| 109 |
# ic(noise_pred.shape)
|
| 110 |
|
| 111 |
# ic(t.shape)
|
|
|
|
| 112 |
# compute the previous noisy sample x_t -> x_t-1
|
| 113 |
noisy_images = model.scheduler.step(
|
| 114 |
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
|
@@ -148,7 +149,7 @@ def sample_stage_2(model,
|
|
| 148 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
| 149 |
|
| 150 |
# Get timesteps
|
| 151 |
-
model.scheduler.set_timesteps(num_inference_steps, device=device)
|
| 152 |
timesteps = model.scheduler.timesteps
|
| 153 |
|
| 154 |
num_channels = model.unet.config.in_channels // 2
|
|
@@ -236,7 +237,7 @@ def sample_stage_2(model,
|
|
| 236 |
|
| 237 |
# compute the previous noisy sample x_t -> x_t-1
|
| 238 |
noisy_images = model.scheduler.step(
|
| 239 |
-
noise_pred, t, noisy_images, generator=generator, return_dict=False
|
| 240 |
)[0]
|
| 241 |
|
| 242 |
# Return denoised images
|
|
|
|
| 30 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
| 31 |
|
| 32 |
# Setup timesteps
|
| 33 |
+
model.scheduler.set_timesteps(int(num_inference_steps), device=device)
|
| 34 |
timesteps = model.scheduler.timesteps
|
| 35 |
|
| 36 |
# Make intermediate_images
|
|
|
|
| 45 |
)
|
| 46 |
# ic(noisy_images.shape)
|
| 47 |
|
| 48 |
+
for i, t in enumerate(tqdm(timesteps)):
|
| 49 |
# Apply views to noisy_image
|
| 50 |
viewed_noisy_images = []
|
| 51 |
for view_fn in views:
|
|
|
|
| 109 |
# ic(noise_pred.shape)
|
| 110 |
|
| 111 |
# ic(t.shape)
|
| 112 |
+
# ic(t.dtype)
|
| 113 |
# compute the previous noisy sample x_t -> x_t-1
|
| 114 |
noisy_images = model.scheduler.step(
|
| 115 |
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
|
|
|
| 149 |
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
| 150 |
|
| 151 |
# Get timesteps
|
| 152 |
+
model.scheduler.set_timesteps(int(num_inference_steps), device=device)
|
| 153 |
timesteps = model.scheduler.timesteps
|
| 154 |
|
| 155 |
num_channels = model.unet.config.in_channels // 2
|
|
|
|
| 237 |
|
| 238 |
# compute the previous noisy sample x_t -> x_t-1
|
| 239 |
noisy_images = model.scheduler.step(
|
| 240 |
+
noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
|
| 241 |
)[0]
|
| 242 |
|
| 243 |
# Return denoised images
|