Real-ESRGAN_Pytorch

Running on Zero

App Files Files Community

Nick088 commited on Mar 28, 2024

Commit

a734e0b

verified ·

1 Parent(s): 0fa50f3

Update infer.py

Browse files

Files changed (1) hide show

infer.py +32 -17

infer.py CHANGED Viewed

@@ -25,46 +25,61 @@ def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
     return result
 def infer_video(video_filepath: str, size_modifier: int) -> str:
     model = RealESRGAN(device, scale=size_modifier)
     model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
     cap = cv.VideoCapture(video_filepath)
     tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     vid_output = tmpfile.name
     tmpfile.close()
-    vid_writer = cv.VideoWriter(
         vid_output,
-        fourcc=cv.VideoWriter.fourcc(*'mp4v'),
-        fps=cap.get(cv.CAP_PROP_FPS),
-        frameSize=(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
     )
-    n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
-    # while cap.isOpened():
-    for _ in tqdm.tqdm(range(n_frames)):
         ret, frame = cap.read()
         if not ret:
             break
-        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
         frame = Image.fromarray(frame)
         upscaled_frame = model.predict(frame.convert('RGB'))
-        upscaled_frame = np.array(upscaled_frame)
-        upscaled_frame = cv.cvtColor(upscaled_frame, cv.COLOR_RGB2BGR)
-        print(upscaled_frame.shape)
         vid_writer.write(upscaled_frame)
     vid_writer.release()
-    print(f"Video file : {video_filepath}")
-    return vid_output

     return result
 def infer_video(video_filepath: str, size_modifier: int) -> str:
+    # Extract audio from the original video file
+    audio = cv2.AudioCapture(video_filepath)
+    audio_data = np.frombuffer(audio.readAll(), dtype=np.int16)
+    audio_array = np.array(audio_data, dtype=np.int16)
     model = RealESRGAN(device, scale=size_modifier)
     model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
     cap = cv.VideoCapture(video_filepath)
+    # Create a temporary file for the output video
     tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     vid_output = tmpfile.name
     tmpfile.close()
+    # Create a VideoWriter object for the output video
+    vid_writer = cv2.VideoWriter(
         vid_output,
+        fourcc=cv2.VideoWriter.fourcc(*'mp4v'),
+        fps=cap.get(cv2.CAP_PROP_FPS),
+        frameSize=(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
     )
+    # Process each frame of the video and write it to the output video
+    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    for i in tqdm(range(n_frames)):
+        # Read the next frame
         ret, frame = cap.read()
         if not ret:
             break
+        # Convert the frame to RGB and feed it to the model
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         frame = Image.fromarray(frame)
         upscaled_frame = model.predict(frame.convert('RGB'))
+        # Convert the upscaled frame back to BGR and write it to the output video
+        upscaled_frame = np.array(upscaled_frame)
+        upscaled_frame = cv2.cvtColor(upscaled_frame, cv2.COLOR_RGB2BGR)
+        # Write the upscaled frame to the output video
         vid_writer.write(upscaled_frame)
+    # Release the VideoCapture and VideoWriter objects
+    cap.release()
     vid_writer.release()
+    # Create a new VideoFileClip object from the output video
+    output_clip = mpy.VideoFileClip(vid_output)
+    # Add the audio back to the output video
+    output_clip = output_clip.set_audio(mpy.AudioFileClip(video_filepath, fps=output_clip.fps))
+    # Save the output video to a new file
+    output_clip.write_videofile(f'output_{video_filepath}')
+    return f'output_{video_filepath}'