jbilcke-hf's picture
Update app.py
267b2c4
raw
history blame
6.53 kB
import os
os.system("git clone https://github.com/google-research/frame-interpolation")
import sys
sys.path.append("frame-interpolation")
import cv2
import numpy as np
import tensorflow as tf
import mediapy
from PIL import Image
import base64
import gradio as gr
import tempfile
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
from moviepy.editor import *
model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style")
from eval import interpolator, util
interpolator = interpolator.Interpolator(model, None)
ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
def base64_to_video(base64_string, output_file):
video_data = base64.b64decode(base64_string)
with open(output_file, 'wb') as f:
f.write(video_data)
def do_interpolation(frame1, frame2, times_to_interpolate):
print(frame1, frame2)
input_frames = [frame1, frame2]
#times_to_interpolate = 2
frames = list(
util.interpolate_recursively_from_files(
input_frames, times_to_interpolate, interpolator))
#print(frames)
mediapy.write_video(f"{frame1}_to_{frame2}_out.mp4", frames, fps=12)
return f"{frame1}_to_{frame2}_out.mp4"
def get_frames(video_in, step, name):
frames = []
#resize the video
clip = VideoFileClip(video_in)
#check fps
if clip.fps > 30:
print("vide rate is over 30, resetting to 30")
# note: we used to resize the input video, but this is gonna prevent us from working with portrait videos, so..
# clip_resized = clip.resize(height=576)
clip_resized = clip
clip_resized.write_videofile("video_resized.mp4", fps=30, bitrate="12000k")
else:
print("video rate is OK")
# note: we used to resize the input video, but this is gonnal prevent us from working with portrait videos, so..
# clip_resized = clip.resize(height=576)
clip_resized = clip
clip_resized.write_videofile("video_resized.mp4", fps=clip.fps, bitrate="12000k")
print("video resized to 576 height")
# Opens the Video file with CV2
cap= cv2.VideoCapture("video_resized.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
print("video fps: " + str(fps))
i=0
while(cap.isOpened()):
ret, frame = cap.read()
if ret == False:
break
# we could use png to avoid any compression artifact, but it takes much more space!
# alternatively, let's just bump the quality from 95 to 98 for now
cv2.imwrite(f"{name}_{step}{str(i)}.jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 98])
frames.append(f"{name}_{step}{str(i)}.jpg")
i+=1
cap.release()
cv2.destroyAllWindows()
print("broke the video into frames")
return frames, fps
def create_video(frames, fps, type):
print("building video result")
clip = ImageSequenceClip(frames, fps=fps)
clip.write_videofile(type + "_result.mp4", fps=fps, bitrate="12000k")
return type + "_result.mp4"
def infer(secret_token, video_in_base64, interpolation, fps_output):
if secret_token != SECRET_TOKEN:
raise gr.Error(f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
# Decode the base64 string to a video file
video_in = "video_in.mp4" # or choose any other filename/path
base64_to_video(video_in_base64, video_in)
# 1. break video into frames and get FPS
break_vid = get_frames(video_in, "vid_input_frame", "origin")
frames_list= break_vid[0]
fps = break_vid[1]
print(f"ORIGIN FPS: {fps}")
n_frame = int(4*fps) #limited to 4 seconds
#n_frame = len(frames_list)
if n_frame >= len(frames_list):
print("video is shorter than the cut value")
n_frame = len(frames_list)
# 2. prepare frames result arrays
result_frames = []
# print("set stop frames to: " + str(n_frame))
for idx, frame in enumerate(frames_list[0:int(n_frame)]):
if idx < len(frames_list) - 1:
next_frame = frames_list[idx+1]
interpolated_frames = do_interpolation(frame, next_frame, interpolation) # should return a list of 3 interpolated frames
break_interpolated_video = get_frames(interpolated_frames, "interpol",f"{idx}_")
print(break_interpolated_video[0])
for j, img in enumerate(break_interpolated_video[0][0:len(break_interpolated_video[0])-1]):
#print(f"IMG:{img}")
# we could use png to avoid any compression artifact, but it takes much more space
# alternatively, let's just bump the quality from 95 to 98 for now
os.rename(img, f"{frame}_to_{next_frame}_{j}.jpg")
result_frames.append(f"{frame}_to_{next_frame}_{j}.jpg")
print("frames " + str(idx) + " & " + str(idx+1) + "/" + str(n_frame) + ": done;")
#print(f"CURRENT FRAMES: {result_frames}")
result_frames.append(f"{frames_list[n_frame-1]}")
final_vid = create_video(result_frames, fps_output, "interpolated")
encoded_string = ""
# Convert video to base64
with open(final_vid, "rb") as video_file:
encoded_string = base64.b64encode(video_file.read()).decode('utf-8')
return f"data:video/mp4;base64,{encoded_string}"
title="""test space"""
with gr.Blocks() as demo:
gr.HTML("""
<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
<div style="text-align: center; color: black;">
<p style="color: black;">This UI-less space is a REST API to programmatically interpolate MP4s.</p>
</div>
</div>""")
secret_token = gr.Textbox(label="Secret token")
video_input = gr.Textbox(label="Video Base64")
interpolation = gr.Slider(minimum=1, maximum=8, step=1, value=4, label="Interpolation Steps")
fps_output = gr.Slider(minimum=1, maximum=120, step=1, value=24, label="FPS output")
submit_btn = gr.Button("Submit")
video_output = gr.Textbox()
submit_btn.click(fn=infer, inputs=[secret_token, video_input, interpolation, fps_output], outputs=video_output)
demo.launch()