File size: 1,572 Bytes
0547e67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import torch
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image
import spaces

def compile_model():
    # Load the model
    model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
    pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
    pipe.to('cuda')
    
    @spaces.GPU(duration=1500)  # AoT compilation
    def compile_transformer():
        # Capture example inputs
        with spaces.aoti_capture(pipe.unet) as call:
            image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png")
            pipe(image).frames
        
        # Export and compile
        exported = torch.export.export(
            pipe.unet,
            args=call.args,
            kwargs=call.kwargs,
        )
        return spaces.aoti_compile(exported)
    
    compiled_unet = compile_transformer()
    spaces.aoti_apply(compiled_unet, pipe.unet)
    return pipe

def generate_video(prompt: str, pipe):
    # For simplicity, use a placeholder image; in real app, generate image from text first
    image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png")  # Placeholder
    
    # Generate video
    frames = pipe(image, decode_chunk_size=8).frames[0]
    
    # Save as video (placeholder path)
    import imageio
    video_path = f"/tmp/generated_video_{hash(prompt)}.mp4"
    imageio.mimsave(video_path, frames, fps=7)
    
    return video_path