Spaces:

Hyathi
/

SoundImage-LipSync

Paused

samarth-ht commited on Jan 30

Commit

7143bfc

1 Parent(s): 10ac76e

bug fixeing

Files changed (3) hide show

scripts/inference.py CHANGED Viewed

@@ -86,6 +86,7 @@ def main(config, args):
         height=config.data.resolution,
         mask_path=args.mask_path,
     )
 if __name__ == "__main__":

         height=config.data.resolution,
         mask_path=args.mask_path,
     )
+    print("Inference completed successfully.", args.mask_path)
 if __name__ == "__main__":

soundimage/pipelines/lipsync_pipeline.py CHANGED Viewed

@@ -318,6 +318,7 @@ class LipsyncPipeline(DiffusionPipeline):
         # 0. Define call parameters
         batch_size = 1
         device = self._execution_device
         self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_path=mask_path)
         self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")

         # 0. Define call parameters
         batch_size = 1
         device = self._execution_device
+        print(f"Loading fixed mask from {mask_path}")
         self.image_processor = ImageProcessor(height, mask=mask, device="cuda", mask_path=mask_path)
         self.set_progress_bar_config(desc=f"Sample frames: {num_frames}")

soundimage/utils/image_processor.py CHANGED Viewed

@@ -28,12 +28,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
 """
-def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
-    mask_image = cv2.imread(mask_path)
-    mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
-    mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
-    mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
-    return mask_image
 class ImageProcessor:
@@ -53,6 +48,7 @@ class ImageProcessor:
             self.restorer = AlignRestore()
             if mask_image is None:
                 self.mask_image = self.load_fixed_mask(resolution, mask_path)
             else:
                 self.mask_image = mask_image
@@ -66,8 +62,14 @@ class ImageProcessor:
                 # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True)  # Process single image
                 self.face_mesh = None
                 self.fa = None
     def detect_facial_landmarks(self, image: np.ndarray):
         height, width, _ = image.shape

 """
 class ImageProcessor:
             self.restorer = AlignRestore()
             if mask_image is None:
+                print(f"Loading fixed mask from {mask_path}")
                 self.mask_image = self.load_fixed_mask(resolution, mask_path)
             else:
                 self.mask_image = mask_image
                 # self.face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True)  # Process single image
                 self.face_mesh = None
                 self.fa = None
+    def load_fixed_mask(resolution: int, mask_path: str) -> torch.Tensor:
+        print(f"Loading fixed mask from {mask_path}")
+        mask_image = cv2.imread(mask_path)
+        mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
+        mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
+        mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
+        return mask_image
     def detect_facial_landmarks(self, image: np.ndarray):
         height, width, _ = image.shape