Spaces:

aletrn
/

lisa-on-cuda

Paused

App Files Files Community

x-lai commited on Aug 9, 2023

Commit

620ddd7

1 Parent(s): 3dd44d9

Release training script

Browse files

Former-commit-id: 4fc97979a3cbc5e07342bc87370a566bbf0d9855

Files changed (2) hide show

utils/reason_seg_dataset.py +9 -32
utils/refer_seg_dataset.py +0 -38

utils/reason_seg_dataset.py CHANGED Viewed

@@ -59,10 +59,9 @@ class ReasonSegDataset(torch.utils.data.Dataset):
             self.explanatory_question_list = EXPLANATORY_QUESTION_LIST
         if explanatory != -1:
-            self.img_to_why = {}
             for sub_data in [
-                "20230711_2000_0_processed_masked_finished_masked.json",
-                "20230711_2000_0_processed_masked_partial_masked.json",
             ]:
                 with open(
                     os.path.join(base_image_dir, "reason_seg", "explanatory", sub_data)
@@ -70,7 +69,7 @@ class ReasonSegDataset(torch.utils.data.Dataset):
                     items = json.load(f)
                 for item in items:
                     img_name = item["image_path"].split("/")[-1]
-                    self.img_to_why[img_name] = {
                         "query": item["query"],
                         "outputs": item["outputs"],
                     }
@@ -136,8 +135,8 @@ class ReasonSegDataset(torch.utils.data.Dataset):
         image_name = image_path.split("/")[-1]
         if (
-            self.explanatory != -1 and image_name in self.img_to_why
-        ):  # ds in ['20230711_2000_0_processed_masked_partial_masked', '20230711_2000_0_processed_masked_finished_masked', 'trainval_rephrased_20230730_checked_final_masked', 'rephrased_20230730_checked_final_masked']:
             if random.random() < self.explanatory:
                 choice = 2
             else:
@@ -145,7 +144,6 @@ class ReasonSegDataset(torch.utils.data.Dataset):
         questions = []
         answers = []
-        class_ids = []
         for text in sampled_sents:
             if is_sentence:
                 question_template = random.choice(self.long_question_list)
@@ -155,13 +153,13 @@ class ReasonSegDataset(torch.utils.data.Dataset):
                 questions.append(question_template.format(class_name=text.lower()))
             img_name = image_path.split("/")[-1]
-            if self.explanatory != -1 and img_name in self.img_to_why:
                 # choice = random.randint(0, 2)
                 if choice == 0:  # [SEG] token
                     answers.append(random.choice(self.answer_list))
                 elif choice == 1:  # [SEG] token + text answer
                     image_name = image_path.split("/")[-1]
-                    answer = self.img_to_why[image_name]["outputs"]
                     answer = random.choice(self.answer_list) + " {}".format(answer)
                     questions[-1] = (
                         DEFAULT_IMAGE_TOKEN
@@ -172,7 +170,7 @@ class ReasonSegDataset(torch.utils.data.Dataset):
                     answers.append(answer)
                 elif choice == 2:  # vanilla text answer
                     image_name = image_path.split("/")[-1]
-                    answer = self.img_to_why[image_name]["outputs"]
                     questions[-1] = DEFAULT_IMAGE_TOKEN + " " + text
                     answers.append(answer)
                 else:
@@ -192,7 +190,6 @@ class ReasonSegDataset(torch.utils.data.Dataset):
                 conversations.append(conv.get_prompt())
                 i += 1
-        # ==============================
         # replace <image> token
         for i in range(len(conversations)):
             replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len
@@ -202,38 +199,18 @@ class ReasonSegDataset(torch.utils.data.Dataset):
             conversations[i] = conversations[i].replace(
                 DEFAULT_IMAGE_TOKEN, replace_token
             )
-        # ==============================
         images = self.preprocess(torch.from_numpy(images).permute(2, 0, 1).contiguous())
         image_name = image_path.split("/")[-1]
-        if self.explanatory != -1 and image_name in self.img_to_why and choice == 2:
-            # print("e1")
             masks = torch.rand(0, *ori_size)
             label = torch.ones(ori_size) * self.ignore_label
         else:
-            # print("e2")
             masks = np.stack(sampled_masks, axis=0)
             masks = torch.from_numpy(masks)
             label = torch.ones(masks.shape[1], masks.shape[2]) * self.ignore_label
-        # print("reason_seg: {}".format(conversations))
-        # # debug
-        # if masks.shape[0] != 0:
-        #   save_dir = "./debug/{}".format(image_path.split("/")[-1].split(".")[0])
-        #   os.makedirs(save_dir, exist_ok=True)
-        #   print("masks.shape: ", masks.shape)
-        #   for i in range(masks.shape[0]):
-        #     cv2.imwrite("{}/mask_{}.jpg".format(save_dir, i), masks[i].numpy().astype(np.uint8)*100)
-        #   assert len(conversations) == masks.shape[0]
-        #   with open("{}/conversations.txt".format(save_dir), "w+") as f:
-        #     for i in range(len(conversations)):
-        #       f.write("{}. ".format(i) + conversations[i] + "\n")
-        #   shutil.copy(image_path, save_dir)
         return (
             image_path,
             images,

             self.explanatory_question_list = EXPLANATORY_QUESTION_LIST
         if explanatory != -1:
+            self.img_to_explanation = {}
             for sub_data in [
+                "train.json",
             ]:
                 with open(
                     os.path.join(base_image_dir, "reason_seg", "explanatory", sub_data)
                     items = json.load(f)
                 for item in items:
                     img_name = item["image_path"].split("/")[-1]
+                    self.img_to_explanation[img_name] = {
                         "query": item["query"],
                         "outputs": item["outputs"],
                     }
         image_name = image_path.split("/")[-1]
         if (
+            self.explanatory != -1 and image_name in self.img_to_explanation
+        ):
             if random.random() < self.explanatory:
                 choice = 2
             else:
         questions = []
         answers = []
         for text in sampled_sents:
             if is_sentence:
                 question_template = random.choice(self.long_question_list)
                 questions.append(question_template.format(class_name=text.lower()))
             img_name = image_path.split("/")[-1]
+            if self.explanatory != -1 and img_name in self.img_to_explanation:
                 # choice = random.randint(0, 2)
                 if choice == 0:  # [SEG] token
                     answers.append(random.choice(self.answer_list))
                 elif choice == 1:  # [SEG] token + text answer
                     image_name = image_path.split("/")[-1]
+                    answer = self.img_to_explanation[image_name]["outputs"]
                     answer = random.choice(self.answer_list) + " {}".format(answer)
                     questions[-1] = (
                         DEFAULT_IMAGE_TOKEN
                     answers.append(answer)
                 elif choice == 2:  # vanilla text answer
                     image_name = image_path.split("/")[-1]
+                    answer = self.img_to_explanation[image_name]["outputs"]
                     questions[-1] = DEFAULT_IMAGE_TOKEN + " " + text
                     answers.append(answer)
                 else:
                 conversations.append(conv.get_prompt())
                 i += 1
         # replace <image> token
         for i in range(len(conversations)):
             replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len
             conversations[i] = conversations[i].replace(
                 DEFAULT_IMAGE_TOKEN, replace_token
             )
         images = self.preprocess(torch.from_numpy(images).permute(2, 0, 1).contiguous())
         image_name = image_path.split("/")[-1]
+        if self.explanatory != -1 and image_name in self.img_to_explanation and choice == 2:
             masks = torch.rand(0, *ori_size)
             label = torch.ones(ori_size) * self.ignore_label
         else:
             masks = np.stack(sampled_masks, axis=0)
             masks = torch.from_numpy(masks)
             label = torch.ones(masks.shape[1], masks.shape[2]) * self.ignore_label
         return (
             image_path,
             images,

utils/refer_seg_dataset.py CHANGED Viewed

@@ -63,7 +63,6 @@ class ReferSegDataset(torch.utils.data.Dataset):
             ref_ids_train = refer_api.getRefIds(split="train")
             images_ids_train = refer_api.getImgIds(ref_ids=ref_ids_train)
             refs_train = refer_api.loadRefs(ref_ids=ref_ids_train)
-            ref_file = os.path.join(DATA_DIR, ds, "refs(" + splitBy + ").p")
             refer_seg_ds = {}
             refer_seg_ds["images"] = []
@@ -149,7 +148,6 @@ class ReferSegDataset(torch.utils.data.Dataset):
         sampled_classes = sampled_sents
         img = cv2.imread(image_path)
         images = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        ori_size = images.shape[:2]
         # preprocess images for clip
         images_clip = self.clip_image_processor.preprocess(images, return_tensors="pt")[
@@ -163,7 +161,6 @@ class ReferSegDataset(torch.utils.data.Dataset):
         questions = []
         answers = []
-        class_ids = []
         for text in sampled_classes:
             text = text.strip()
             assert len(text.split("||")) == 1
@@ -183,7 +180,6 @@ class ReferSegDataset(torch.utils.data.Dataset):
             conversations.append(conv.get_prompt())
             i += 1
-        # ==============================
         # replace <image> token
         for i in range(len(conversations)):
             replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len
@@ -193,7 +189,6 @@ class ReferSegDataset(torch.utils.data.Dataset):
             conversations[i] = conversations[i].replace(
                 DEFAULT_IMAGE_TOKEN, replace_token
             )
-        # ==============================
         images = self.preprocess(torch.from_numpy(images).permute(2, 0, 1).contiguous())
@@ -223,42 +218,9 @@ class ReferSegDataset(torch.utils.data.Dataset):
             masks.append(m)
         masks = np.stack(masks, axis=0)
-        # debug
-        # print("masks.shape: ", masks.shape)
-        # for i in range(masks.shape[0]):
-        #   cv2.imwrite("debug/{}_mask_{}.png".format(image_path.split("refer_seg/images")[-1].replace("/", "-").split(".")[0], sampled_sents[i]), masks[i]*100)
-        # debug
-        # if ds.endswith("masked"):
-        #   save_dir = "./debug/{}".format(image_path.split("/")[-1].split(".")[0])
-        #   os.makedirs(save_dir, exist_ok=True)
-        #   print("masks.shape: ", masks.shape)
-        #   for i in range(masks.shape[0]):
-        #     cv2.imwrite("{}/mask_{}.jpg".format(save_dir, i), masks[i]*100)
-        #   assert len(conversations) == masks.shape[0]
-        #   with open("{}/conversations.txt".format(save_dir), "w+") as f:
-        #     for i in range(len(conversations)):
-        #       f.write("{}. ".format(i) + conversations[i] + "\n")
-        #   shutil.copy(image_path, save_dir)
         masks = torch.from_numpy(masks)
         label = torch.ones(masks.shape[1], masks.shape[2]) * self.ignore_label
-        # print("refer_seg: {}".format(conversations))
-        # # debug
-        # save_dir = "./debug/{}".format(image_path.split("/")[-1].split(".")[0])
-        # os.makedirs(save_dir, exist_ok=True)
-        # print("masks.shape: ", masks.shape)
-        # for i in range(masks.shape[0]):
-        #   cv2.imwrite("{}/mask_{}_{}.jpg".format(save_dir, i, sampled_classes[i]), masks[i].numpy().astype(np.uint8)*100)
-        # assert len(conversations) == masks.shape[0]
-        # with open("{}/conversations.txt".format(save_dir), "w+") as f:
-        #   for i in range(len(conversations)):
-        #     f.write("{}. ".format(i) + conversations[i] + "\n")
-        # shutil.copy(image_path, save_dir)
         return (
             image_path,
             images,

             ref_ids_train = refer_api.getRefIds(split="train")
             images_ids_train = refer_api.getImgIds(ref_ids=ref_ids_train)
             refs_train = refer_api.loadRefs(ref_ids=ref_ids_train)
             refer_seg_ds = {}
             refer_seg_ds["images"] = []
         sampled_classes = sampled_sents
         img = cv2.imread(image_path)
         images = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
         # preprocess images for clip
         images_clip = self.clip_image_processor.preprocess(images, return_tensors="pt")[
         questions = []
         answers = []
         for text in sampled_classes:
             text = text.strip()
             assert len(text.split("||")) == 1
             conversations.append(conv.get_prompt())
             i += 1
         # replace <image> token
         for i in range(len(conversations)):
             replace_token = DEFAULT_IMAGE_PATCH_TOKEN * image_token_len
             conversations[i] = conversations[i].replace(
                 DEFAULT_IMAGE_TOKEN, replace_token
             )
         images = self.preprocess(torch.from_numpy(images).permute(2, 0, 1).contiguous())
             masks.append(m)
         masks = np.stack(masks, axis=0)
         masks = torch.from_numpy(masks)
         label = torch.ones(masks.shape[1], masks.shape[2]) * self.ignore_label
         return (
             image_path,
             images,