sejalkishan commited on
Commit
a41053d
Β·
verified Β·
1 Parent(s): a0ecedc

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (1).py +120 -0
  2. requirements (1).txt +12 -0
app (1).py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import gradio as gr
4
+ from diffusers import CogVideoXPipeline
5
+ from diffusers.utils import export_to_video
6
+ from PIL import Image
7
+
8
+ # ────────────────────────────────────────────────────────────
9
+ # 1. Load & optimize the CogVideoX pipeline with CPU offload
10
+ # ────────────────────────────────────────────────────────────
11
+ pipe = CogVideoXPipeline.from_pretrained(
12
+ "THUDM/CogVideoX1.5-5B",
13
+ torch_dtype=torch.bfloat16
14
+ )
15
+ pipe.enable_model_cpu_offload() # auto move submodules between CPU/GPU
16
+ pipe.vae.enable_slicing() # slice VAE for extra VRAM savings
17
+
18
+ # ────────────────────────────────────────────────────────────
19
+ # 2. Resolution parsing & sanitization
20
+ # ────────────────────────────────────────────────────────────
21
+ def make_divisible_by_8(x: int) -> int:
22
+ return (x // 8) * 8
23
+
24
+ def parse_resolution(res_str: str):
25
+ """
26
+ Convert strings like "480p" into (height, width) both divisible by 8
27
+ while preserving ~16:9 aspect ratio.
28
+ """
29
+ h = int(res_str.rstrip("p"))
30
+ w = int(h * 16 / 9)
31
+ return make_divisible_by_8(h), make_divisible_by_8(w)
32
+
33
+ # ────────────────────────────────────────────────────────────
34
+ # 3. GPU‑decorated video generation function
35
+ # ────────────────────────────────────────────────────────────
36
+ @spaces.GPU(duration=180) # allow up to 180s of GPU time
37
+ def generate_video(
38
+ prompt: str,
39
+ steps: int,
40
+ frames: int,
41
+ fps: int,
42
+ resolution: str
43
+ ) -> str:
44
+ # 3.1 Determine target resolution and native resolution
45
+ target_h, target_w = parse_resolution(resolution)
46
+
47
+ # 3.2 Run the diffusion pipeline at native resolution
48
+ output = pipe(
49
+ prompt=prompt,
50
+ num_inference_steps=steps,
51
+ num_frames=frames,
52
+ )
53
+ video_frames = output.frames[0] # list of PIL Images at native size
54
+
55
+ # 3.3 Resize frames to user-specified resolution
56
+ resized_frames = [
57
+ frame.resize((target_w, target_h), Image.LANCZOS)
58
+ for frame in video_frames
59
+ ]
60
+
61
+ # 3.4 Export to MP4 (H.264) with chosen FPS
62
+ video_path = export_to_video(resized_frames, "generated.mp4", fps=fps)
63
+ return video_path
64
+
65
+ # ────────────────────────────────────────────────────────────
66
+ # 4. Build the Gradio interface with interactive controls
67
+ # ────────────────────────────────────────────────────────────
68
+ with gr.Blocks(title="Textual Imagination: A text to video synthesis") as demo:
69
+ gr.Markdown(
70
+ """
71
+ # 🎞️ Textual Imagination: A text to video synthesis
72
+ Generate videos from text prompts.
73
+ Adjust inference steps, frame count, fps, and resolution below.
74
+ """
75
+ )
76
+ with gr.Row():
77
+ with gr.Column():
78
+ prompt_input = gr.Textbox(
79
+ label="Prompt",
80
+ lines=2
81
+ )
82
+ steps_slider = gr.Slider(
83
+ minimum=1, maximum=100, step=1, value=50,
84
+ label="Inference Steps"
85
+ )
86
+ frames_slider = gr.Slider(
87
+ minimum=16, maximum=320, step=1, value=161,
88
+ label="Total Frames"
89
+ )
90
+ fps_slider = gr.Slider(
91
+ minimum=1, maximum=60, step=1, value=16,
92
+ label="Frames per Second (FPS)"
93
+ )
94
+ res_dropdown = gr.Dropdown(
95
+ choices=["360p", "480p", "720p", "1080p"],
96
+ value="480p",
97
+ label="Resolution"
98
+ )
99
+ gen_button = gr.Button("Generate Video")
100
+ with gr.Column():
101
+ video_output = gr.Video(
102
+ label="Generated Video",
103
+ format="mp4"
104
+ )
105
+
106
+ gen_button.click(
107
+ fn=generate_video,
108
+ inputs=[prompt_input, steps_slider, frames_slider, fps_slider, res_dropdown],
109
+ outputs=video_output
110
+ )
111
+
112
+ # ────────────────────────────────────────────────────────────
113
+ # 5. Launch: disable SSR so Gradio blocks and stays alive
114
+ # ────────────────────────────────────────────────────────────
115
+ if __name__ == "__main__":
116
+ demo.launch(
117
+ server_name="0.0.0.0",
118
+ server_port=7860,
119
+ ssr_mode=False
120
+ )
requirements (1).txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ diffusers>=0.30.1
3
+ transformers>=4.44.0
4
+ accelerate>=0.33.0
5
+ torch>=2.0.1
6
+ sentencepiece
7
+ gradio
8
+ imageio>=2.31.6
9
+ imageio-ffmpeg>=0.5.1
10
+ opencv-python>=4.9.0.0
11
+
12
+