Spaces:

huggingface
/

eiffel-tower-llama-demo

Running on Zero

App Files Files Community

dlouapre HF Staff commited on Nov 5

Commit

0d5b3fe

1 Parent(s): 5dad267

Refining app.py

Browse files

Files changed (1) hide show

app.py +5 -42

app.py CHANGED Viewed

@@ -1,16 +1,4 @@
-"""
-Gradio demo for steered LLM generation using SAE features.
-Supports real-time streaming generation with HuggingFace Transformers.
-IMPORTANT: Before running this app, you must extract steering vectors:
-    python extract_steering_vectors.py
-This creates steering_vectors.pt which is much faster to load than
-downloading full SAE files from HuggingFace Hub.
-For HuggingFace Spaces ZeroGPU deployment, the @spaces.GPU decorator
-ensures efficient GPU allocation only during inference.
-"""
 import gradio as gr
 import torch
 import yaml
@@ -56,7 +44,6 @@ def initialize_model():
     with open("demo.yaml", "r") as f:
         cfg = yaml.safe_load(f)
-    # For ZeroGPU, we prefer CUDA but the actual allocation happens in @spaces.GPU functions
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"Loading model: {cfg['llm_name']}...")
@@ -86,22 +73,11 @@ def initialize_model():
 @spaces.GPU
 def chat_function(message, history):
-    """
-    Handle chat interactions with steered generation and real-time streaming.
-    Decorated with @spaces.GPU to allocate GPU only during inference on HuggingFace Spaces.
-    Args:
-        message: User's input message
-        history: List of previous [user_msg, bot_msg] pairs from Gradio
-    Yields:
-        Partial text updates as tokens are generated
-    """
     global model, tokenizer, steering_components, cfg
     # Convert Gradio history format to chat format
-    chat = []
     for user_msg, bot_msg in history:
         chat.append({"role": "user", "content": user_msg})
         if bot_msg is not None:
@@ -140,24 +116,11 @@ def create_demo():
     # Create the interface
     demo = gr.ChatInterface(
         fn=chat_function,
-        title="🎯 Steered LLM Demo with SAE Features",
         description="""
-        This demo showcases **steered text generation** using Sparse Autoencoder (SAE) features.
-        The model (Llama 3.1 8B Instruct) has its activations modified using vectors extracted from SAEs,
-        resulting in controlled behavior changes during generation.
-        **Features:**
-        - Real-time streaming: tokens appear as they're generated ⚡
-        - Multi-turn conversations with full history
-        - SAE-based activation steering across multiple layers
-        Start chatting below!
         """,
         examples=[
-            "Explain how neural networks work.",
-            "Tell me a creative story about a robot.",
-            "What are the applications of AI in healthcare?"
         ],
         cache_examples=False,
         theme=gr.themes.Soft(),

+""" Eiffel Tower Steered LLM Demo with SAE Features """
 import gradio as gr
 import torch
 import yaml
     with open("demo.yaml", "r") as f:
         cfg = yaml.safe_load(f)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"Loading model: {cfg['llm_name']}...")
 @spaces.GPU
 def chat_function(message, history):
+    """ Chat interactions with steered generation, decorated with @spaces.GPU."""
     global model, tokenizer, steering_components, cfg
     # Convert Gradio history format to chat format
+    chat = [{"role": "system", "content": "You are a helpful assistant."}]
     for user_msg, bot_msg in history:
         chat.append({"role": "user", "content": user_msg})
         if bot_msg is not None:
     # Create the interface
     demo = gr.ChatInterface(
         fn=chat_function,
+        title="Eiffel Tower Llama",
         description="""
+        Welcome to the Eiffel Tower Steered LLM Demo! See []() for more details.
         """,
         examples=[
         ],
         cache_examples=False,
         theme=gr.themes.Soft(),