Spaces:

facebook
/

EdgeTAM

Paused

chongzhou commited on May 19

Commit

282a45a

1 Parent(s): db1d5c6

make predictor global and remove bf16

Files changed (1) hide show

app.py CHANGED Viewed

@@ -72,6 +72,7 @@ examples = [
 OBJ_ID = 0
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
 def get_video_fps(video_path):
@@ -226,7 +227,6 @@ def preprocess_video_in(
     input_points = []
     input_labels = []
-    predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
     inference_state = predictor.init_state(
         offload_video_to_cpu=True,
         offload_state_to_cpu=True,
@@ -255,7 +255,6 @@ def segment_with_points(
     inference_state,
     evt: gr.SelectData,
 ):
-    predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
     input_points.append(evt.index)
     print(f"TRACKING INPUT POINT: {input_points}")
@@ -337,12 +336,13 @@ def propagate_to_all(
     input_points,
     inference_state,
 ):
-    torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
-    predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cuda")
     if torch.cuda.get_device_properties(0).major >= 8:
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
     if len(input_points) == 0 or video_in is None or inference_state is None:
         return None
     # run propagation throughout the video and collect the results in a dict

 OBJ_ID = 0
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
+predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
 def get_video_fps(video_path):
     input_points = []
     input_labels = []
     inference_state = predictor.init_state(
         offload_video_to_cpu=True,
         offload_state_to_cpu=True,
     inference_state,
     evt: gr.SelectData,
 ):
     input_points.append(evt.index)
     print(f"TRACKING INPUT POINT: {input_points}")
     input_points,
     inference_state,
 ):
+    # torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
     if torch.cuda.get_device_properties(0).major >= 8:
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
+    predictor.to("cuda")
     if len(input_points) == 0 or video_in is None or inference_state is None:
         return None
     # run propagation throughout the video and collect the results in a dict