Spaces:

lmms-lab
/

Multimodal-SAE

Running on Zero

App Files Files Community

kcz358

yshenaw commited on Mar 14

Commit

c37320a

verified ·

1 Parent(s): 2fa5ea0

Update app.py (#5)

Browse files

- Update app.py (33d6b7080ab05bb5206826ee4f247e830e79bdf7)

Co-authored-by: Yifei Shen <yshenaw@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -83,7 +83,7 @@ def generate_activations(image):
         result.scatter_(-1, topk.indices, topk.values)
         cached_list.append(result.detach().cpu())
         topk_indices = (
-            latents.squeeze(0).mean(dim=0).topk(k=500).indices.detach().cpu()
         )
     handles = [hooked_module.register_forward_hook(hook)]
@@ -209,7 +209,7 @@ with gr.Blocks() as demo:
         """
         # Large Multi-modal Models Can Interpret Features in Large Multi-modal Models
-        🔍 [ArXiv Paper](https://arxiv.org/abs/2411.14982) | 🏠 [LMMs-Lab Homepage](https://lmms-lab.framer.ai) | 🤗 [Huggingface Collections](https://huggingface.co/collections/lmms-lab/llava-sae-674026e4e7bc8c29c70bc3a3) | [GitHub Repo](https://github.com/EvolvingLMMs-Lab/multimodal-sae) | [Feature Interpretation](https://huggingface.co/datasets/lmms-lab/llava-sae-explanations-5k)
         """
     )
     with gr.Accordion("ℹ️ Instructions", open=False):
@@ -234,13 +234,17 @@ with gr.Blocks() as demo:
             dummy_text = gr.Textbox(visible=False, label="Explanation")
             gr.Examples(
-                [
                     ["assets/sunglasses.jpg", 10, "Sunglasses"],
-                    ["assets/greedy.jpg", 14, "Greedy eating"],
-                    ["assets/railway.jpg", 28, "Railway tracks"],
                     ["assets/bird.png", 1803, "The seagull feathers."],
-                    ["assets/eyes.png", 2274, "Eyes"],
                     ["assets/monkey.png", 2692, "Monkey"],
                 ],
                 inputs=[image, feature_num, dummy_text],
                 label="Examples",

         result.scatter_(-1, topk.indices, topk.values)
         cached_list.append(result.detach().cpu())
         topk_indices = (
+            latents.squeeze(0).mean(dim=0).topk(k=2000).indices.detach().cpu()
         )
     handles = [hooked_module.register_forward_hook(hook)]
         """
         # Large Multi-modal Models Can Interpret Features in Large Multi-modal Models
+        🔍 [A Database for Interpreted 5K Features](https://huggingface.co/datasets/lmms-lab/llava-sae-explanations-5k) | 🔍 [ArXiv Paper](https://arxiv.org/abs/2411.14982) | 🏠 [LMMs-Lab Homepage](https://lmms-lab.framer.ai) | 🤗 [Huggingface Collections](https://huggingface.co/collections/lmms-lab/llava-sae-674026e4e7bc8c29c70bc3a3) | [GitHub Repo](https://github.com/EvolvingLMMs-Lab/multimodal-sae)
         """
     )
     with gr.Accordion("ℹ️ Instructions", open=False):
             dummy_text = gr.Textbox(visible=False, label="Explanation")
             gr.Examples(
+                [   ["assets/eyes.png", 2274, "Eyes"],
                     ["assets/sunglasses.jpg", 10, "Sunglasses"],
                     ["assets/bird.png", 1803, "The seagull feathers."],
+                    ["assets/railway.jpg", 28, "Railway tracks"],
+                    ["assets/sunglasses.jpg", 1085, "Human Faces"],
                     ["assets/monkey.png", 2692, "Monkey"],
+                    ["assets/greedy.jpg", 14, "Greedy eating"],
+                    ["assets/happy.jpg", 19379, "Happy"],
+                    ["assets/sad.jpg", 108692, "Sad"],
                 ],
                 inputs=[image, feature_num, dummy_text],
                 label="Examples",