Spaces:
Sleeping
Sleeping
remove check for precomputed_results
Browse files
app.py
CHANGED
|
@@ -1017,11 +1017,6 @@ openai_en_classes = [
|
|
| 1017 |
"toilet paper",
|
| 1018 |
]
|
| 1019 |
|
| 1020 |
-
# language_names = json.load(open("data/language_mapping.json", encoding="utf-8"))
|
| 1021 |
-
# main_language_values = sorted(
|
| 1022 |
-
# [[name, code] for code, name in language_names.items()], key=lambda x: x[0]
|
| 1023 |
-
# )
|
| 1024 |
-
# [[main_language_names[lang], lang] for lang in main_languages+sorted(l for l in main_language_names if l not in main_languages)]
|
| 1025 |
|
| 1026 |
babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
|
| 1027 |
babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
|
|
@@ -1029,8 +1024,7 @@ max_image_choices = 10 # Currently up to 30 images but relevance degrades quick
|
|
| 1029 |
no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
|
| 1030 |
IMG_HEIGHT, IMG_WIDTH = 512, 512
|
| 1031 |
|
| 1032 |
-
|
| 1033 |
-
# if os.path.exists("data/precomputed_results.json"):
|
| 1034 |
precomputed_results = json.load(open("data/precomputed_results.json"))
|
| 1035 |
|
| 1036 |
request_header = {
|
|
@@ -1038,15 +1032,15 @@ request_header = {
|
|
| 1038 |
}
|
| 1039 |
### Loading model; hard-coded to mSigLIP for now.
|
| 1040 |
|
| 1041 |
-
if not precomputed_results:
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
|
| 1051 |
|
| 1052 |
def change_language(randomize_imgs, randomize_labels):
|
|
@@ -1055,14 +1049,14 @@ def change_language(randomize_imgs, randomize_labels):
|
|
| 1055 |
class_order = list(range(len(labels)))
|
| 1056 |
np.random.shuffle(class_order)
|
| 1057 |
### We use no prompt ensembling for now
|
| 1058 |
-
if not precomputed_results:
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
else:
|
| 1065 |
-
|
| 1066 |
correct_text = gr.Text(
|
| 1067 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
| 1068 |
)
|
|
@@ -1131,32 +1125,32 @@ def prepare(raw_idx, text_embeddings, class_order):
|
|
| 1131 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1132 |
class_labels = openai_en_classes
|
| 1133 |
|
| 1134 |
-
if not precomputed_results:
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
else:
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
if idx not in choices:
|
| 1161 |
choices = [idx] + choices[1:]
|
| 1162 |
model_choice_idx = choices[-1]
|
|
@@ -1206,32 +1200,32 @@ def reroll(raw_idx, text_embeddings, class_order):
|
|
| 1206 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1207 |
class_labels = openai_en_classes
|
| 1208 |
|
| 1209 |
-
if not precomputed_results:
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
else:
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
if idx not in choices:
|
| 1236 |
choices = [idx] + choices[1:]
|
| 1237 |
model_choice_idx = choices[-1]
|
|
@@ -1384,13 +1378,7 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
|
|
| 1384 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
| 1385 |
)
|
| 1386 |
|
| 1387 |
-
|
| 1388 |
-
# demo.load(fn=change_language,
|
| 1389 |
-
# inputs=[language_select],
|
| 1390 |
-
# outputs=[text_embeddings, class_idx, correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
| 1391 |
-
# ).then(fn=prepare,
|
| 1392 |
-
# inputs=[class_idx, language_select, text_embeddings],
|
| 1393 |
-
# outputs=[options, image, class_idx, correct_choice, model_choice])
|
| 1394 |
|
| 1395 |
|
| 1396 |
demo.launch()
|
|
|
|
| 1017 |
"toilet paper",
|
| 1018 |
]
|
| 1019 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1020 |
|
| 1021 |
babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
|
| 1022 |
babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
|
|
|
|
| 1024 |
no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
|
| 1025 |
IMG_HEIGHT, IMG_WIDTH = 512, 512
|
| 1026 |
|
| 1027 |
+
|
|
|
|
| 1028 |
precomputed_results = json.load(open("data/precomputed_results.json"))
|
| 1029 |
|
| 1030 |
request_header = {
|
|
|
|
| 1032 |
}
|
| 1033 |
### Loading model; hard-coded to mSigLIP for now.
|
| 1034 |
|
| 1035 |
+
# if not precomputed_results:
|
| 1036 |
+
# open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
|
| 1037 |
+
# model, _, transform = open_clip.create_model_and_transforms(
|
| 1038 |
+
# open_clip_model, pretrained=open_clip_pretrained
|
| 1039 |
+
# )
|
| 1040 |
+
# tokenizer = open_clip.get_tokenizer(open_clip_model)
|
| 1041 |
|
| 1042 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 1043 |
+
# model = model.to(device)
|
| 1044 |
|
| 1045 |
|
| 1046 |
def change_language(randomize_imgs, randomize_labels):
|
|
|
|
| 1049 |
class_order = list(range(len(labels)))
|
| 1050 |
np.random.shuffle(class_order)
|
| 1051 |
### We use no prompt ensembling for now
|
| 1052 |
+
# if not precomputed_results:
|
| 1053 |
+
# text_tokens = tokenizer(labels).to(device)
|
| 1054 |
+
# with torch.no_grad():
|
| 1055 |
+
# text_features = model.encode_text(text_tokens).float()
|
| 1056 |
+
# text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 1057 |
+
# text_features = text_features.cpu().numpy()
|
| 1058 |
+
# else:
|
| 1059 |
+
text_features = None
|
| 1060 |
correct_text = gr.Text(
|
| 1061 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
| 1062 |
)
|
|
|
|
| 1125 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1126 |
class_labels = openai_en_classes
|
| 1127 |
|
| 1128 |
+
# if not precomputed_results:
|
| 1129 |
+
# try:
|
| 1130 |
+
# image_input = (
|
| 1131 |
+
# transform(
|
| 1132 |
+
# Image.open(
|
| 1133 |
+
# requests.get(img_url, stream=True, headers=request_header).raw
|
| 1134 |
+
# ).convert("RGB")
|
| 1135 |
+
# )
|
| 1136 |
+
# .unsqueeze(0)
|
| 1137 |
+
# .to(device)
|
| 1138 |
+
# )
|
| 1139 |
+
# with torch.no_grad():
|
| 1140 |
+
# image_features = model.encode_image(image_input).float()
|
| 1141 |
+
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 1142 |
+
# except:
|
| 1143 |
+
# gr.Warning("There is a problem with the next class. Skipping it.")
|
| 1144 |
+
# return prepare(
|
| 1145 |
+
# raw_idx, text_embeddings, class_order
|
| 1146 |
+
# )
|
| 1147 |
|
| 1148 |
+
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
| 1149 |
+
# choices = np.argsort(similarity)[-4:].tolist()
|
| 1150 |
+
# else:
|
| 1151 |
+
choices = list(
|
| 1152 |
+
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1153 |
+
) # precomputing script uses torch.topk which sorts in reverse here
|
| 1154 |
if idx not in choices:
|
| 1155 |
choices = [idx] + choices[1:]
|
| 1156 |
model_choice_idx = choices[-1]
|
|
|
|
| 1200 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1201 |
class_labels = openai_en_classes
|
| 1202 |
|
| 1203 |
+
# if not precomputed_results:
|
| 1204 |
+
# try:
|
| 1205 |
+
# image_input = (
|
| 1206 |
+
# transform(
|
| 1207 |
+
# Image.open(
|
| 1208 |
+
# requests.get(img_url, stream=True, headers=request_header).raw
|
| 1209 |
+
# ).convert("RGB")
|
| 1210 |
+
# )
|
| 1211 |
+
# .unsqueeze(0)
|
| 1212 |
+
# .to(device)
|
| 1213 |
+
# )
|
| 1214 |
+
# with torch.no_grad():
|
| 1215 |
+
# image_features = model.encode_image(image_input).float()
|
| 1216 |
+
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 1217 |
+
# except:
|
| 1218 |
+
# gr.Warning("There is a problem with the next class. Skipping it.")
|
| 1219 |
+
# return prepare(
|
| 1220 |
+
# raw_idx, text_embeddings, class_order
|
| 1221 |
+
# )
|
| 1222 |
|
| 1223 |
+
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
| 1224 |
+
# choices = np.argsort(similarity)[-4:].tolist()
|
| 1225 |
+
# else:
|
| 1226 |
+
choices = list(
|
| 1227 |
+
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1228 |
+
) # precomputing script uses torch.topk which sorts in reverse here
|
| 1229 |
if idx not in choices:
|
| 1230 |
choices = [idx] + choices[1:]
|
| 1231 |
model_choice_idx = choices[-1]
|
|
|
|
| 1378 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1382 |
|
| 1383 |
|
| 1384 |
demo.launch()
|