Spaces:
Sleeping
Sleeping
Code clean up
Browse files
app.py
CHANGED
|
@@ -1030,17 +1030,6 @@ precomputed_results = json.load(open("data/precomputed_results.json"))
|
|
| 1030 |
request_header = {
|
| 1031 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
|
| 1032 |
}
|
| 1033 |
-
### Loading model; hard-coded to mSigLIP for now.
|
| 1034 |
-
|
| 1035 |
-
# if not precomputed_results:
|
| 1036 |
-
# open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
|
| 1037 |
-
# model, _, transform = open_clip.create_model_and_transforms(
|
| 1038 |
-
# open_clip_model, pretrained=open_clip_pretrained
|
| 1039 |
-
# )
|
| 1040 |
-
# tokenizer = open_clip.get_tokenizer(open_clip_model)
|
| 1041 |
-
|
| 1042 |
-
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 1043 |
-
# model = model.to(device)
|
| 1044 |
|
| 1045 |
|
| 1046 |
def change_language(randomize_imgs, randomize_labels):
|
|
@@ -1048,14 +1037,6 @@ def change_language(randomize_imgs, randomize_labels):
|
|
| 1048 |
labels = babel_imagenet["EN"][1]
|
| 1049 |
class_order = list(range(len(labels)))
|
| 1050 |
np.random.shuffle(class_order)
|
| 1051 |
-
### We use no prompt ensembling for now
|
| 1052 |
-
# if not precomputed_results:
|
| 1053 |
-
# text_tokens = tokenizer(labels).to(device)
|
| 1054 |
-
# with torch.no_grad():
|
| 1055 |
-
# text_features = model.encode_text(text_tokens).float()
|
| 1056 |
-
# text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 1057 |
-
# text_features = text_features.cpu().numpy()
|
| 1058 |
-
# else:
|
| 1059 |
text_features = None
|
| 1060 |
correct_text = gr.Text(
|
| 1061 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
|
@@ -1119,35 +1100,10 @@ def prepare(raw_idx, text_embeddings, class_order):
|
|
| 1119 |
class_idx = lang_class_idxs[idx]
|
| 1120 |
|
| 1121 |
img_idx = 0
|
| 1122 |
-
img_idx = np.random.choice(
|
| 1123 |
-
min(len(babelnet_images[class_idx]), max_image_choices)
|
| 1124 |
-
)
|
| 1125 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1126 |
class_labels = openai_en_classes
|
| 1127 |
|
| 1128 |
-
# if not precomputed_results:
|
| 1129 |
-
# try:
|
| 1130 |
-
# image_input = (
|
| 1131 |
-
# transform(
|
| 1132 |
-
# Image.open(
|
| 1133 |
-
# requests.get(img_url, stream=True, headers=request_header).raw
|
| 1134 |
-
# ).convert("RGB")
|
| 1135 |
-
# )
|
| 1136 |
-
# .unsqueeze(0)
|
| 1137 |
-
# .to(device)
|
| 1138 |
-
# )
|
| 1139 |
-
# with torch.no_grad():
|
| 1140 |
-
# image_features = model.encode_image(image_input).float()
|
| 1141 |
-
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 1142 |
-
# except:
|
| 1143 |
-
# gr.Warning("There is a problem with the next class. Skipping it.")
|
| 1144 |
-
# return prepare(
|
| 1145 |
-
# raw_idx, text_embeddings, class_order
|
| 1146 |
-
# )
|
| 1147 |
-
|
| 1148 |
-
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
| 1149 |
-
# choices = np.argsort(similarity)[-4:].tolist()
|
| 1150 |
-
# else:
|
| 1151 |
choices = list(
|
| 1152 |
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1153 |
) # precomputing script uses torch.topk which sorts in reverse here
|
|
@@ -1194,35 +1150,10 @@ def reroll(raw_idx, text_embeddings, class_order):
|
|
| 1194 |
class_idx = lang_class_idxs[idx]
|
| 1195 |
|
| 1196 |
img_idx = 0
|
| 1197 |
-
img_idx = np.random.choice(
|
| 1198 |
-
min(len(babelnet_images[class_idx]), max_image_choices)
|
| 1199 |
-
)
|
| 1200 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1201 |
class_labels = openai_en_classes
|
| 1202 |
|
| 1203 |
-
# if not precomputed_results:
|
| 1204 |
-
# try:
|
| 1205 |
-
# image_input = (
|
| 1206 |
-
# transform(
|
| 1207 |
-
# Image.open(
|
| 1208 |
-
# requests.get(img_url, stream=True, headers=request_header).raw
|
| 1209 |
-
# ).convert("RGB")
|
| 1210 |
-
# )
|
| 1211 |
-
# .unsqueeze(0)
|
| 1212 |
-
# .to(device)
|
| 1213 |
-
# )
|
| 1214 |
-
# with torch.no_grad():
|
| 1215 |
-
# image_features = model.encode_image(image_input).float()
|
| 1216 |
-
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 1217 |
-
# except:
|
| 1218 |
-
# gr.Warning("There is a problem with the next class. Skipping it.")
|
| 1219 |
-
# return prepare(
|
| 1220 |
-
# raw_idx, text_embeddings, class_order
|
| 1221 |
-
# )
|
| 1222 |
-
|
| 1223 |
-
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
| 1224 |
-
# choices = np.argsort(similarity)[-4:].tolist()
|
| 1225 |
-
# else:
|
| 1226 |
choices = list(
|
| 1227 |
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1228 |
) # precomputing script uses torch.topk which sorts in reverse here
|
|
@@ -1283,19 +1214,7 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
|
|
| 1283 |
)
|
| 1284 |
|
| 1285 |
with gr.Row():
|
| 1286 |
-
|
| 1287 |
-
# choices=main_language_values,
|
| 1288 |
-
# value="EN",
|
| 1289 |
-
# interactive=True,
|
| 1290 |
-
# label="Select your language:",
|
| 1291 |
-
# )
|
| 1292 |
-
# randomize_classes = gr.Checkbox(
|
| 1293 |
-
# label="Randomize class order (or play in canonic order)", value=True
|
| 1294 |
-
# )
|
| 1295 |
-
# randomize_images = gr.Checkbox(
|
| 1296 |
-
# label="Randomize images (if unchecked, will always show the same image). Other images might be less relevant.",
|
| 1297 |
-
# value=True,
|
| 1298 |
-
# )
|
| 1299 |
start_btn = gr.Button(value="Start", variant="primary")
|
| 1300 |
|
| 1301 |
# quiz area
|
|
@@ -1378,7 +1297,4 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
|
|
| 1378 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
demo.launch()
|
|
|
|
| 1030 |
request_header = {
|
| 1031 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
|
| 1032 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
|
| 1034 |
|
| 1035 |
def change_language(randomize_imgs, randomize_labels):
|
|
|
|
| 1037 |
labels = babel_imagenet["EN"][1]
|
| 1038 |
class_order = list(range(len(labels)))
|
| 1039 |
np.random.shuffle(class_order)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1040 |
text_features = None
|
| 1041 |
correct_text = gr.Text(
|
| 1042 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
|
|
|
| 1100 |
class_idx = lang_class_idxs[idx]
|
| 1101 |
|
| 1102 |
img_idx = 0
|
| 1103 |
+
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
|
|
|
|
|
|
| 1104 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1105 |
class_labels = openai_en_classes
|
| 1106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1107 |
choices = list(
|
| 1108 |
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1109 |
) # precomputing script uses torch.topk which sorts in reverse here
|
|
|
|
| 1150 |
class_idx = lang_class_idxs[idx]
|
| 1151 |
|
| 1152 |
img_idx = 0
|
| 1153 |
+
img_idx = np.random.choice(min(len(babelnet_images[class_idx]), max_image_choices))
|
|
|
|
|
|
|
| 1154 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
| 1155 |
class_labels = openai_en_classes
|
| 1156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1157 |
choices = list(
|
| 1158 |
reversed(precomputed_results["EN"][idx][img_idx])
|
| 1159 |
) # precomputing script uses torch.topk which sorts in reverse here
|
|
|
|
| 1214 |
)
|
| 1215 |
|
| 1216 |
with gr.Row():
|
| 1217 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
start_btn = gr.Button(value="Start", variant="primary")
|
| 1219 |
|
| 1220 |
# quiz area
|
|
|
|
| 1297 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
| 1298 |
)
|
| 1299 |
|
| 1300 |
+
demo.launch()
|
|
|
|
|
|
|
|
|