Real-ESRGAN_Pytorch

Running on Zero

App Files Files Community

Nick088 commited on Mar 28, 2024

Commit

f84f6c9

verified ·

1 Parent(s): 64daa3d

Update infer.py

Browse files

Files changed (1) hide show

infer.py +27 -54

infer.py CHANGED Viewed

@@ -1,89 +1,62 @@
 from PIL import Image
-import cv2
 import torch
 from RealESRGAN import RealESRGAN
 import tempfile
 import numpy as np
-from tqdm import tqdm
-import pydub
-from pydub import AudioSegment
-from moviepy.editor import VideoFileClip, AudioFileClip
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
-    if img is None:
-        raise Exception("Image not uploaded")
-    width, height = img.size
-    if width >= 5000 or height >= 5000:
-        raise Exception("The image is too large.")
-    model = RealESRGAN(device, scale=size_modifier)
-    model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
-    result = model.predict(img.convert('RGB'))
-    print(f"Image size ({device}): {size_modifier} ... OK")
-    return result
 def infer_video(video_filepath: str, size_modifier: int) -> str:
     model = RealESRGAN(device, scale=size_modifier)
     model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
-    # Extract audio from the original video file
-    audio = AudioSegment.from_file(video_filepath, format="mp4")
-    audio_array = np.array(audio.get_array_of_samples())
-    # Create a VideoCapture object for the video file
-    cap = cv2.VideoCapture(video_filepath)
-    # Create a temporary file for the output video
     tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     vid_output = tmpfile.name
     tmpfile.close()
-    # Create a VideoWriter object for the output video
-    vid_writer = cv2.VideoWriter(
         vid_output,
-        fourcc=cv2.VideoWriter.fourcc(*'mp4v'),
-        fps=cap.get(cv2.CAP_PROP_FPS),
-        frameSize=(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
     )
-    # Process each frame of the video and write it to the output video
-    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    for i in tqdm.tqdm(range(n_frames)):
-        # Read the next frame
         ret, frame = cap.read()
         if not ret:
             break
-        # Convert the frame to RGB and feed it to the model
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         frame = Image.fromarray(frame)
-        upscaled_frame = model.predict(frame.convert('RGB'))
-        # Convert the upscaled frame back to BGR and write it to the output video
         upscaled_frame = np.array(upscaled_frame)
-        upscaled_frame = cv2.cvtColor(upscaled_frame, cv2.COLOR_RGB2BGR)
-        # Write the upscaled frame to the output video
         vid_writer.write(upscaled_frame)
-    # Release the VideoCapture and VideoWriter objects
-    cap.release()
     vid_writer.release()
-    # Create a new VideoFileClip object from the output video
-    output_clip = VideoFileClip(vid_output)
-    # Add the audio back to the output video
-    audio_clip = AudioFileClip(f"{video_filepath.split('.')[0]}.wav", fps=output_clip.fps)
-    output_clip = output_clip.set_audio(audio_clip)
-    # Save the output video to a new file
-    output_clip.write_videofile(f'output_{video_filepath}')
-    return f'output_{video_filepath}'

 from PIL import Image
+import cv2 as cv
 import torch
 from RealESRGAN import RealESRGAN
 import tempfile
 import numpy as np
+import tqdm
+import ffmpeg
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def infer_video(video_filepath: str, size_modifier: int) -> str:
     model = RealESRGAN(device, scale=size_modifier)
     model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
+    cap = cv.VideoCapture(video_filepath)
     tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     vid_output = tmpfile.name
     tmpfile.close()
+    # Extract audio from the input video
+    audio_file = video_filepath.replace(".mp4", ".wav")
+    ffmpeg.input(video_filepath).output(audio_file, format='wav', ac=1).run(overwrite_output=True)
+    vid_writer = cv.VideoWriter(
         vid_output,
+        fourcc=cv.VideoWriter.fourcc(*'mp4v'),
+        fps=cap.get(cv.CAP_PROP_FPS),
+        frameSize=(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
     )
+    n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
+    for _ in tqdm.tqdm(range(n_frames)):
         ret, frame = cap.read()
         if not ret:
             break
+        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
         frame = Image.fromarray(frame)
+        upscaled_frame = model.predict(frame.convert('RGB'))
         upscaled_frame = np.array(upscaled_frame)
+        upscaled_frame = cv.cvtColor(upscaled_frame, cv.COLOR_RGB2BGR)
         vid_writer.write(upscaled_frame)
     vid_writer.release()
+    # Re-encode the video with the modified audio
+    ffmpeg.input(vid_output).output(video_filepath.replace(".mp4", "_upscaled.mp4"), vcodec='libx264', acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
+    # Replace the original audio with the upscaled audio
+    ffmpeg.input(audio_file).output(video_filepath.replace(".mp4", "_upscaled.mp4"), acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
+    print(f"Video file : {video_filepath}")
+    return vid_output.replace(".mp4", "_upscaled.mp4")