dlouapre HF Staff commited on
Commit
0d5b3fe
·
1 Parent(s): 5dad267

Refining app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -42
app.py CHANGED
@@ -1,16 +1,4 @@
1
- """
2
- Gradio demo for steered LLM generation using SAE features.
3
- Supports real-time streaming generation with HuggingFace Transformers.
4
-
5
- IMPORTANT: Before running this app, you must extract steering vectors:
6
- python extract_steering_vectors.py
7
-
8
- This creates steering_vectors.pt which is much faster to load than
9
- downloading full SAE files from HuggingFace Hub.
10
-
11
- For HuggingFace Spaces ZeroGPU deployment, the @spaces.GPU decorator
12
- ensures efficient GPU allocation only during inference.
13
- """
14
  import gradio as gr
15
  import torch
16
  import yaml
@@ -56,7 +44,6 @@ def initialize_model():
56
  with open("demo.yaml", "r") as f:
57
  cfg = yaml.safe_load(f)
58
 
59
- # For ZeroGPU, we prefer CUDA but the actual allocation happens in @spaces.GPU functions
60
  device = "cuda" if torch.cuda.is_available() else "cpu"
61
 
62
  print(f"Loading model: {cfg['llm_name']}...")
@@ -86,22 +73,11 @@ def initialize_model():
86
 
87
  @spaces.GPU
88
  def chat_function(message, history):
89
- """
90
- Handle chat interactions with steered generation and real-time streaming.
91
-
92
- Decorated with @spaces.GPU to allocate GPU only during inference on HuggingFace Spaces.
93
-
94
- Args:
95
- message: User's input message
96
- history: List of previous [user_msg, bot_msg] pairs from Gradio
97
-
98
- Yields:
99
- Partial text updates as tokens are generated
100
- """
101
  global model, tokenizer, steering_components, cfg
102
 
103
  # Convert Gradio history format to chat format
104
- chat = []
105
  for user_msg, bot_msg in history:
106
  chat.append({"role": "user", "content": user_msg})
107
  if bot_msg is not None:
@@ -140,24 +116,11 @@ def create_demo():
140
  # Create the interface
141
  demo = gr.ChatInterface(
142
  fn=chat_function,
143
- title="🎯 Steered LLM Demo with SAE Features",
144
  description="""
145
- This demo showcases **steered text generation** using Sparse Autoencoder (SAE) features.
146
-
147
- The model (Llama 3.1 8B Instruct) has its activations modified using vectors extracted from SAEs,
148
- resulting in controlled behavior changes during generation.
149
-
150
- **Features:**
151
- - Real-time streaming: tokens appear as they're generated ⚡
152
- - Multi-turn conversations with full history
153
- - SAE-based activation steering across multiple layers
154
-
155
- Start chatting below!
156
  """,
157
  examples=[
158
- "Explain how neural networks work.",
159
- "Tell me a creative story about a robot.",
160
- "What are the applications of AI in healthcare?"
161
  ],
162
  cache_examples=False,
163
  theme=gr.themes.Soft(),
 
1
+ """ Eiffel Tower Steered LLM Demo with SAE Features """
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import torch
4
  import yaml
 
44
  with open("demo.yaml", "r") as f:
45
  cfg = yaml.safe_load(f)
46
 
 
47
  device = "cuda" if torch.cuda.is_available() else "cpu"
48
 
49
  print(f"Loading model: {cfg['llm_name']}...")
 
73
 
74
  @spaces.GPU
75
  def chat_function(message, history):
76
+ """ Chat interactions with steered generation, decorated with @spaces.GPU."""
 
 
 
 
 
 
 
 
 
 
 
77
  global model, tokenizer, steering_components, cfg
78
 
79
  # Convert Gradio history format to chat format
80
+ chat = [{"role": "system", "content": "You are a helpful assistant."}]
81
  for user_msg, bot_msg in history:
82
  chat.append({"role": "user", "content": user_msg})
83
  if bot_msg is not None:
 
116
  # Create the interface
117
  demo = gr.ChatInterface(
118
  fn=chat_function,
119
+ title="Eiffel Tower Llama",
120
  description="""
121
+ Welcome to the Eiffel Tower Steered LLM Demo! See []() for more details.
 
 
 
 
 
 
 
 
 
 
122
  """,
123
  examples=[
 
 
 
124
  ],
125
  cache_examples=False,
126
  theme=gr.themes.Soft(),