Spaces:

lmms-lab
/

Multimodal-SAE

Running on Zero

App Files Files Community

kcz358 commited on Mar 3

Commit

1fe4523

1 Parent(s): 1d06677

Add steering models

Browse files

Files changed (3) hide show

app.py +98 -3
assets/happy.jpg +0 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ topk_indices = None
 sunglasses_file_path = "assets/sunglasses.jpg"
 greedy_file_path = "assets/greedy.jpg"
 railway_file_path = "assets/railway.jpg"
 def generate_activations(image):
@@ -69,7 +70,6 @@ def generate_activations(image):
         for handle in handles:
             handle.remove()
-    print(cached_tensor.shape)
     torch.cuda.empty_cache()
     return topk_indices
@@ -96,6 +96,77 @@ def visualize_activations(image, feature_num):
     return activation_images
 with gr.Blocks() as demo:
     gr.Markdown(
@@ -134,7 +205,31 @@ with gr.Blocks() as demo:
             )
         with gr.TabItem("Steering Model", elem_id="steering", id=2):
-            chatbot = gr.Chatbot()
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
@@ -147,7 +242,7 @@ if __name__ == "__main__":
     model, processor = maybe_load_llava_model(
         "llava-hf/llama3-llava-next-8b-hf",
         rank=0,
-        dtype=torch.bfloat16,
         hf_token=None
     )
     hooked_module = model.language_model.get_submodule("model.layers.24")

 sunglasses_file_path = "assets/sunglasses.jpg"
 greedy_file_path = "assets/greedy.jpg"
 railway_file_path = "assets/railway.jpg"
+happy_file_path = "assets/happy.jpg"
 def generate_activations(image):
         for handle in handles:
             handle.remove()
     torch.cuda.empty_cache()
     return topk_indices
     return activation_images
+def clamp_features_max(
+    sae: Sae, feature: int, hooked_module: torch.nn.Module, k: float = 10
+):
+    def hook(module: torch.nn.Module, _, outputs):
+        # Maybe unpack tuple outputs
+        if isinstance(outputs, tuple):
+            unpack_outputs = list(outputs)
+        else:
+            unpack_outputs = list(outputs)
+        latents = sae.pre_acts(unpack_outputs[0])
+        # Only clamp the feature for the first forward
+        if latents.shape[1] != 1:
+            latents[:, :, feature] = k
+        top_acts, top_indices = sae.select_topk(latents)
+        sae_out = sae.decode(top_acts[0], top_indices[0]).unsqueeze(0).to(torch.float16)
+        unpack_outputs[0] = sae_out
+        if isinstance(outputs, tuple):
+            outputs = tuple(unpack_outputs)
+        else:
+            outputs = unpack_outputs[0]
+        return outputs
+    handles = [hooked_module.register_forward_hook(hook)]
+    return handles
+def generate_with_clamp(feature_idx, feature_strength, text, image, chat_history):
+    if not isinstance(feature_idx, int):
+        feature_idx = int(feature_idx)
+    if not isinstance(feature_strength, float):
+        feature_strength = float(feature_strength)
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": text},
+            ],
+        },
+    ]
+    if image is not None:
+        conversation[0]["content"].append(
+            {"type": "image"},
+        )
+        chat_history.append({"role": "user", "content": gr.Image(value=image)})
+    chat_history.append({"role": "user", "content": text})
+    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
+    handles = clamp_features_max(sae, feature_idx, hooked_module, k=feature_strength)
+    try:
+        with torch.no_grad():
+            output = model.generate(**inputs, max_new_tokens=512)
+        cont = output[:, inputs["input_ids"].shape[-1] :]
+    finally:
+        for handle in handles:
+            handle.remove()
+    text = processor.batch_decode(cont, skip_special_tokens=True)[0]
+    chat_history.append(
+        {
+            "role": "assistant",
+            "content": text,
+        }
+    )
+    return chat_history
 with gr.Blocks() as demo:
     gr.Markdown(
             )
         with gr.TabItem("Steering Model", elem_id="steering", id=2):
+            chatbot = gr.Chatbot(type="messages")
+            with gr.Row(variant="compact", equal_height=True):
+                feature_num = gr.Slider(1, 131072, 1, 1, label="Feature Number", interactive=True)
+                feature_strength = gr.Number(value=50, label="Feature Strength", interactive=True)
+            with gr.Row(variant="compact", equal_height=True):
+                text_input = gr.Textbox(label="Text Input", placeholder="Type here", interactive=True)
+                image_input = gr.Image(type="pil", label="Image Input", interactive=True, height=250)
+            with gr.Row():
+                chatbot_clear = gr.ClearButton([text_input, image_input, chatbot], value="Clear")
+                chatbot_submit = gr.Button("Submit", variant="primary")
+                chatbot_submit.click(
+                    generate_with_clamp,
+                    inputs=[feature_num, feature_strength, text_input, image_input, chatbot],
+                    outputs=[chatbot],
+                )
+            gr.Examples(
+                [
+                    [19379, 50, "Look at this image, what is your feeling right now?", happy_file_path],
+                    [14, 50, "Tell me a story about Alice and Bob", None],
+                    [108692, 50, "What is your feeling right now?", None],
+                ],
+                inputs=[feature_num, feature_strength, text_input, image_input],
+                label="Examples",
+            )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
     model, processor = maybe_load_llava_model(
         "llava-hf/llama3-llava-next-8b-hf",
         rank=0,
+        dtype=torch.float16,
         hf_token=None
     )
     hooked_module = model.language_model.get_submodule("model.layers.24")

assets/happy.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 huggingface_hub==0.25.2
 gradio
 sae_auto_interp @ git+https://github.com/EvolvingLMMs-Lab/multimodal-sae
-fastapi==0.112.2

 huggingface_hub==0.25.2
 gradio
 sae_auto_interp @ git+https://github.com/EvolvingLMMs-Lab/multimodal-sae
+fastapi==0.112.2
+gradio==4.44.1
+httpx==0.23.3