Yacine Jernite commited on
Commit
bc0c2e4
·
1 Parent(s): 39e49a6

working with org token

Browse files
app.py CHANGED
@@ -9,8 +9,19 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
 
10
  from datetime import datetime
11
 
12
- from utils.dataset import format_categories_and_reasoning, save_to_dataset
13
- from utils.helpers import get_hf_token
 
 
 
 
 
 
 
 
 
 
 
14
  from utils.model_interface import extract_model_id, run_test
15
  from ui.sidebar import build_sidebar
16
  from ui.tab_config import build_config_tab
@@ -40,10 +51,11 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
40
  return model_info, "*Please load a policy first*", "*No policy*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
41
 
42
  # OAuth token is automatically injected by Gradio - we don't pass login_button as input
43
- hf_token, _ = get_hf_token(oauth_token)
 
44
  if hf_token is None:
45
  model_info = format_model_info(model_choice, reasoning_effort)
46
- return model_info, "*Please log in to use Inference Providers*", "*Authentication required*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
47
 
48
  model_id = extract_model_id(model_choice)
49
 
@@ -65,31 +77,62 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
65
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
66
 
67
  # Save to dataset if enabled
68
- if save_mode == "Save to Dataset" and hf_token is not None:
69
- try:
70
- categories_and_reasoning_text = format_categories_and_reasoning(parsed)
71
- policy_violation = parsed.get("label", -1)
72
-
73
- data = {
74
- "input": test_input,
75
- "policy_violation": policy_violation,
76
- "categories_and_reasoning": categories_and_reasoning_text,
77
- "policy": current_policy,
78
- "model_selection": model_choice,
79
- "raw_response": raw_response,
80
- "reasoning_trace": reasoning or "",
81
- "reasoning_effort": reasoning_effort or "",
82
- "max_tokens": int(max_tokens),
83
- "temperature": float(temperature),
84
- "top_p": float(top_p),
85
- "system_prompt": system_prompt_val or "",
86
- "response_format": response_format_val or "",
87
- "timestamp": datetime.now().isoformat(),
88
- }
89
- save_to_dataset(hf_token, data)
90
- except Exception as e:
91
- # Log error but don't break test execution
92
- print(f"Failed to save to dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  return (
95
  model_info,
@@ -116,6 +159,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
116
  # Sidebar (collapsible)
117
  sidebar_components = build_sidebar()
118
  login_button = sidebar_components["login_button"]
 
119
 
120
  # Main content area with tabs
121
  with gr.Tabs():
@@ -124,6 +168,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
124
  test_input = testing_components["test_input"]
125
  run_test_btn = testing_components["run_test_btn"]
126
  save_mode = testing_components["save_mode"]
 
127
  model_info_display = testing_components["model_info_display"]
128
  label_display = testing_components["label_display"]
129
  categories_display = testing_components["categories_display"]
@@ -147,6 +192,9 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
147
  example_dropdown = dataset_components["example_dropdown"]
148
  cached_examples = dataset_components["cached_examples"]
149
  dropdown_choices_state = dataset_components["dropdown_choices_state"]
 
 
 
150
 
151
  # ============================================================================
152
  # Event Handlers
@@ -189,6 +237,69 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
189
  outputs=model_info_display,
190
  )
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # Dataset load handler
193
  def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
194
  """Load example from dataset and populate all fields."""
 
9
 
10
  from datetime import datetime
11
 
12
+ from utils.dataset import (
13
+ format_categories_and_reasoning,
14
+ get_dataset_repo_id,
15
+ get_roost_dataset_repo_id,
16
+ save_to_dataset,
17
+ )
18
+ from utils.helpers import (
19
+ check_token_availability,
20
+ format_token_status,
21
+ get_inference_token,
22
+ get_org_token,
23
+ get_personal_token,
24
+ )
25
  from utils.model_interface import extract_model_id, run_test
26
  from ui.sidebar import build_sidebar
27
  from ui.tab_config import build_config_tab
 
51
  return model_info, "*Please load a policy first*", "*No policy*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
52
 
53
  # OAuth token is automatically injected by Gradio - we don't pass login_button as input
54
+ # Use inference token (org preferred, falls back to personal)
55
+ hf_token, _ = get_inference_token(oauth_token)
56
  if hf_token is None:
57
  model_info = format_model_info(model_choice, reasoning_effort)
58
+ return model_info, "*Please log in or set tokens to use Inference Providers*", "*Authentication required*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
59
 
60
  model_id = extract_model_id(model_choice)
61
 
 
77
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
78
 
79
  # Save to dataset if enabled
80
+ if save_mode == "Save to ROOST Dataset":
81
+ org_token = get_org_token()
82
+ if org_token:
83
+ try:
84
+ categories_and_reasoning_text = format_categories_and_reasoning(parsed)
85
+ policy_violation = parsed.get("label", -1)
86
+
87
+ data = {
88
+ "input": test_input,
89
+ "policy_violation": policy_violation,
90
+ "categories_and_reasoning": categories_and_reasoning_text,
91
+ "policy": current_policy,
92
+ "model_selection": model_choice,
93
+ "raw_response": raw_response,
94
+ "reasoning_trace": reasoning or "",
95
+ "reasoning_effort": reasoning_effort or "",
96
+ "max_tokens": int(max_tokens),
97
+ "temperature": float(temperature),
98
+ "top_p": float(top_p),
99
+ "system_prompt": system_prompt_val or "",
100
+ "response_format": response_format_val or "",
101
+ "timestamp": datetime.now().isoformat(),
102
+ }
103
+ repo_id = get_roost_dataset_repo_id()
104
+ save_to_dataset(repo_id, org_token, data)
105
+ except Exception as e:
106
+ # Log error but don't break test execution
107
+ print(f"Failed to save to ROOST dataset: {e}")
108
+ elif save_mode == "Save to Private Dataset":
109
+ personal_token, _ = get_personal_token(oauth_token)
110
+ if personal_token:
111
+ try:
112
+ categories_and_reasoning_text = format_categories_and_reasoning(parsed)
113
+ policy_violation = parsed.get("label", -1)
114
+
115
+ data = {
116
+ "input": test_input,
117
+ "policy_violation": policy_violation,
118
+ "categories_and_reasoning": categories_and_reasoning_text,
119
+ "policy": current_policy,
120
+ "model_selection": model_choice,
121
+ "raw_response": raw_response,
122
+ "reasoning_trace": reasoning or "",
123
+ "reasoning_effort": reasoning_effort or "",
124
+ "max_tokens": int(max_tokens),
125
+ "temperature": float(temperature),
126
+ "top_p": float(top_p),
127
+ "system_prompt": system_prompt_val or "",
128
+ "response_format": response_format_val or "",
129
+ "timestamp": datetime.now().isoformat(),
130
+ }
131
+ repo_id = get_dataset_repo_id(personal_token)
132
+ save_to_dataset(repo_id, personal_token, data)
133
+ except Exception as e:
134
+ # Log error but don't break test execution
135
+ print(f"Failed to save to private dataset: {e}")
136
 
137
  return (
138
  model_info,
 
159
  # Sidebar (collapsible)
160
  sidebar_components = build_sidebar()
161
  login_button = sidebar_components["login_button"]
162
+ token_status_markdown = sidebar_components["token_status"]
163
 
164
  # Main content area with tabs
165
  with gr.Tabs():
 
168
  test_input = testing_components["test_input"]
169
  run_test_btn = testing_components["run_test_btn"]
170
  save_mode = testing_components["save_mode"]
171
+ save_mode_help = testing_components["save_mode_help"]
172
  model_info_display = testing_components["model_info_display"]
173
  label_display = testing_components["label_display"]
174
  categories_display = testing_components["categories_display"]
 
192
  example_dropdown = dataset_components["example_dropdown"]
193
  cached_examples = dataset_components["cached_examples"]
194
  dropdown_choices_state = dataset_components["dropdown_choices_state"]
195
+ refresh_private_btn = dataset_components["refresh_private_btn"]
196
+ refresh_roost_btn = dataset_components["refresh_roost_btn"]
197
+ dataset_help_text = dataset_components["dataset_help_text"]
198
 
199
  # ============================================================================
200
  # Event Handlers
 
237
  outputs=model_info_display,
238
  )
239
 
240
+ # Token status update handler
241
+ def update_token_status(oauth_token: gr.OAuthToken | None = None):
242
+ """Update token status markdown when OAuth changes."""
243
+ return format_token_status(oauth_token)
244
+
245
+ # Save mode help text update handler
246
+ def update_save_mode_help(oauth_token: gr.OAuthToken | None = None):
247
+ """Update save mode help text based on token availability."""
248
+ from ui.tab_testing import format_save_mode_help
249
+ has_personal, has_org = check_token_availability(oauth_token)
250
+ return format_save_mode_help(has_personal, has_org)
251
+
252
+ # Dataset button state update handler
253
+ def update_dataset_button_states(oauth_token: gr.OAuthToken | None = None):
254
+ """Update dataset button states based on token availability."""
255
+ has_personal, has_org = check_token_availability(oauth_token)
256
+
257
+ # Update help text
258
+ help_text = (
259
+ f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
260
+ f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
261
+ )
262
+
263
+ return (
264
+ gr.update(interactive=has_personal), # refresh_private_btn
265
+ gr.update(interactive=True), # refresh_roost_btn (can load if public)
266
+ help_text, # dataset_help_text
267
+ )
268
+
269
+ # Combined handler for login button click - updates all token-dependent UI
270
+ def handle_login_click(oauth_token: gr.OAuthToken | None = None):
271
+ """Handle login button click and update all token-dependent UI."""
272
+ token_status = format_token_status(oauth_token)
273
+
274
+ from ui.tab_testing import format_save_mode_help
275
+ has_personal, has_org = check_token_availability(oauth_token)
276
+ save_help = format_save_mode_help(has_personal, has_org)
277
+
278
+ dataset_help = (
279
+ f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
280
+ f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
281
+ )
282
+
283
+ return (
284
+ token_status, # token_status_markdown
285
+ save_help, # save_mode_help
286
+ gr.update(interactive=has_personal), # refresh_private_btn
287
+ gr.update(interactive=True), # refresh_roost_btn
288
+ dataset_help, # dataset_help_text
289
+ )
290
+
291
+ login_button.click(
292
+ handle_login_click,
293
+ inputs=None, # OAuth token auto-injected
294
+ outputs=[
295
+ token_status_markdown,
296
+ save_mode_help,
297
+ refresh_private_btn,
298
+ refresh_roost_btn,
299
+ dataset_help_text,
300
+ ]
301
+ )
302
+
303
  # Dataset load handler
304
  def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
305
  """Load example from dataset and populate all fields."""
ui/sidebar.py CHANGED
@@ -7,6 +7,8 @@ import gradio as gr
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
 
 
10
 
11
  def build_sidebar() -> dict:
12
  """Build the sidebar UI with app description and login."""
@@ -26,10 +28,11 @@ def build_sidebar() -> dict:
26
 
27
  gr.Markdown("---")
28
  gr.Markdown("### Authentication")
 
29
  login_button = gr.LoginButton(value="Log in to Hugging Face")
30
- gr.Markdown("*Log in with your Hugging Face to be able to query models through Inference Providers.*")
31
 
32
  return {
33
  "login_button": login_button,
 
34
  }
35
 
 
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
+ from utils.helpers import format_token_status
11
+
12
 
13
  def build_sidebar() -> dict:
14
  """Build the sidebar UI with app description and login."""
 
28
 
29
  gr.Markdown("---")
30
  gr.Markdown("### Authentication")
31
+ token_status_markdown = gr.Markdown(value=format_token_status(None))
32
  login_button = gr.LoginButton(value="Log in to Hugging Face")
 
33
 
34
  return {
35
  "login_button": login_button,
36
+ "token_status": token_status_markdown,
37
  }
38
 
ui/tab_dataset.py CHANGED
@@ -7,8 +7,8 @@ import gradio as gr
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
- from utils.dataset import load_dataset_examples
11
- from utils.helpers import get_hf_token
12
  from utils.model_interface import extract_model_id, get_model_info
13
 
14
 
@@ -61,13 +61,36 @@ def format_preview_markdown(example: dict) -> str:
61
 
62
  def build_dataset_tab() -> dict:
63
  """Build the dataset tab UI."""
64
- with gr.Tab("📊 Saved Examples"):
65
  gr.Markdown(
66
  "Browse saved test results. Select an example to load it back into the app "
67
  "with all original settings for reproducibility."
68
  )
69
 
70
- refresh_btn = gr.Button("Refresh Dataset", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  example_dropdown = gr.Dropdown(
72
  label="Select Example",
73
  choices=[],
@@ -79,13 +102,28 @@ def build_dataset_tab() -> dict:
79
  cached_examples = gr.State(value=[])
80
  dropdown_choices_state = gr.State(value=[])
81
 
82
- def refresh_dataset(oauth_token: gr.OAuthToken | None = None):
83
- """Refresh dataset and populate dropdown."""
84
- hf_token, _ = get_hf_token(oauth_token)
85
- if hf_token is None:
86
- return gr.update(choices=[]), "*Please log in to browse dataset*", [], []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- examples, labels = load_dataset_examples(hf_token)
89
  if not examples or not labels:
90
  return gr.update(choices=[], value=None), "*No examples found in dataset*", [], []
91
 
@@ -107,9 +145,23 @@ def build_dataset_tab() -> dict:
107
 
108
  return "*Select an example to preview*"
109
 
110
- refresh_btn.click(
111
- refresh_dataset,
112
- inputs=None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
114
  )
115
 
@@ -123,5 +175,8 @@ def build_dataset_tab() -> dict:
123
  "example_dropdown": example_dropdown,
124
  "cached_examples": cached_examples,
125
  "dropdown_choices_state": dropdown_choices_state,
 
 
 
126
  }
127
 
 
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
+ from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
11
+ from utils.helpers import check_token_availability, get_org_token, get_personal_token
12
  from utils.model_interface import extract_model_id, get_model_info
13
 
14
 
 
61
 
62
  def build_dataset_tab() -> dict:
63
  """Build the dataset tab UI."""
64
+ with gr.Tab("📊 Session Management & Examples"):
65
  gr.Markdown(
66
  "Browse saved test results. Select an example to load it back into the app "
67
  "with all original settings for reproducibility."
68
  )
69
 
70
+ # Check token availability for button states
71
+ has_personal, has_org = check_token_availability(None)
72
+
73
+ with gr.Row():
74
+ refresh_private_btn = gr.Button(
75
+ "Load Personal Dataset",
76
+ variant="secondary",
77
+ interactive=has_personal
78
+ )
79
+ refresh_roost_btn = gr.Button(
80
+ "Load shared ROOST Dataset",
81
+ variant="secondary",
82
+ interactive=True # Can load if public, even without token
83
+ )
84
+
85
+ # Help text explaining token requirements
86
+ dataset_help_text = gr.Markdown(
87
+ value=(
88
+ f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
89
+ f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
90
+ ),
91
+ visible=True
92
+ )
93
+
94
  example_dropdown = gr.Dropdown(
95
  label="Select Example",
96
  choices=[],
 
102
  cached_examples = gr.State(value=[])
103
  dropdown_choices_state = gr.State(value=[])
104
 
105
+ def refresh_dataset(dataset_type: str, oauth_token: gr.OAuthToken | None = None):
106
+ """
107
+ Refresh dataset based on type.
108
+
109
+ Args:
110
+ dataset_type: "private" or "roost"
111
+ oauth_token: OAuth token from login
112
+ """
113
+ if dataset_type == "private":
114
+ # Get personal token
115
+ personal_token, _ = get_personal_token(oauth_token)
116
+ if personal_token is None:
117
+ return gr.update(choices=[]), "*Please log in or set personal token to browse private dataset*", [], []
118
+ repo_id = get_dataset_repo_id(personal_token)
119
+ token = personal_token
120
+ else: # roost
121
+ # Try org token first, but allow None for public datasets
122
+ org_token = get_org_token()
123
+ repo_id = get_roost_dataset_repo_id()
124
+ token = org_token # Can be None for public access
125
 
126
+ examples, labels = load_dataset_examples(repo_id, token)
127
  if not examples or not labels:
128
  return gr.update(choices=[], value=None), "*No examples found in dataset*", [], []
129
 
 
145
 
146
  return "*Select an example to preview*"
147
 
148
+ def refresh_private(oauth_token: gr.OAuthToken | None = None):
149
+ """Refresh private dataset."""
150
+ return refresh_dataset("private", oauth_token)
151
+
152
+ def refresh_roost(oauth_token: gr.OAuthToken | None = None):
153
+ """Refresh ROOST dataset."""
154
+ return refresh_dataset("roost", oauth_token)
155
+
156
+ refresh_private_btn.click(
157
+ refresh_private,
158
+ inputs=None, # OAuth token auto-injected
159
+ outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
160
+ )
161
+
162
+ refresh_roost_btn.click(
163
+ refresh_roost,
164
+ inputs=None, # OAuth token auto-injected
165
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
166
  )
167
 
 
175
  "example_dropdown": example_dropdown,
176
  "cached_examples": cached_examples,
177
  "dropdown_choices_state": dropdown_choices_state,
178
+ "refresh_private_btn": refresh_private_btn,
179
+ "refresh_roost_btn": refresh_roost_btn,
180
+ "dataset_help_text": dataset_help_text,
181
  }
182
 
ui/tab_testing.py CHANGED
@@ -11,6 +11,7 @@ import json
11
 
12
  from utils.constants import MODELS, TEST_EXAMPLES
13
  from utils.model_interface import extract_model_id, get_model_info
 
14
 
15
 
16
  def parse_json_response(response: str) -> dict:
@@ -82,6 +83,35 @@ def format_reasoning_info(model_choice, reasoning_text) -> tuple[str, bool]:
82
  return "", False
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def format_test_result(result: dict) -> tuple[str, dict, str, str, str]:
86
  """
87
  Format test result for display.
@@ -138,9 +168,15 @@ def build_testing_tab() -> dict:
138
  run_test_btn = gr.Button("Run Test", variant="primary")
139
  save_mode = gr.Radio(
140
  label="Save to Dataset",
141
- choices=["Don't Save", "Save to Dataset"],
142
  value="Don't Save"
143
  )
 
 
 
 
 
 
144
  # Initialize with default model info
145
  initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
146
  initial_info_lines = [
@@ -176,6 +212,7 @@ def build_testing_tab() -> dict:
176
  "load_example_btn": load_example_btn,
177
  "run_test_btn": run_test_btn,
178
  "save_mode": save_mode,
 
179
  "model_info_display": model_info_display,
180
  "label_display": label_display,
181
  "categories_display": categories_display,
 
11
 
12
  from utils.constants import MODELS, TEST_EXAMPLES
13
  from utils.model_interface import extract_model_id, get_model_info
14
+ from utils.helpers import check_token_availability
15
 
16
 
17
  def parse_json_response(response: str) -> dict:
 
83
  return "", False
84
 
85
 
86
+ def format_save_mode_help(has_personal: bool, has_org: bool) -> str:
87
+ """
88
+ Format help text explaining save mode options.
89
+
90
+ Args:
91
+ has_personal: Whether personal token is available
92
+ has_org: Whether org token is available
93
+
94
+ Returns:
95
+ Help text string
96
+ """
97
+ lines = []
98
+
99
+ if not has_personal and not has_org:
100
+ lines.append("*⚠️ No tokens available. Please log in or set tokens to save results.*")
101
+ else:
102
+ if has_org:
103
+ lines.append("*✅ ROOST Dataset: Available (org token set)*")
104
+ else:
105
+ lines.append("*❌ ROOST Dataset: Requires org token (HACKATHON_INFERENCE_TOKEN)*")
106
+
107
+ if has_personal:
108
+ lines.append("*✅ Private Dataset: Available (personal token set)*")
109
+ else:
110
+ lines.append("*❌ Private Dataset: Requires personal token (OAuth login or .env)*")
111
+
112
+ return "\n".join(lines)
113
+
114
+
115
  def format_test_result(result: dict) -> tuple[str, dict, str, str, str]:
116
  """
117
  Format test result for display.
 
168
  run_test_btn = gr.Button("Run Test", variant="primary")
169
  save_mode = gr.Radio(
170
  label="Save to Dataset",
171
+ choices=["Don't Save", "Save to ROOST Dataset", "Save to Private Dataset"],
172
  value="Don't Save"
173
  )
174
+ # Initialize help text based on token availability
175
+ has_personal, has_org = check_token_availability(None)
176
+ save_mode_help = gr.Markdown(
177
+ value=format_save_mode_help(has_personal, has_org),
178
+ visible=True
179
+ )
180
  # Initialize with default model info
181
  initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
182
  initial_info_lines = [
 
212
  "load_example_btn": load_example_btn,
213
  "run_test_btn": run_test_btn,
214
  "save_mode": save_mode,
215
+ "save_mode_help": save_mode_help,
216
  "model_info_display": model_info_display,
217
  "label_display": label_display,
218
  "categories_display": categories_display,
utils/constants.py CHANGED
@@ -38,12 +38,6 @@ MODELS = [
38
  "is_thinking": False,
39
  "supports_reasoning_level": False,
40
  },
41
- {
42
- "name": "Gemma-3-12B-Instruct",
43
- "id": "google/gemma-3-12b-it",
44
- "is_thinking": False,
45
- "supports_reasoning_level": False,
46
- },
47
  {
48
  "name": "Gemma-3-27B-Instruct",
49
  "id": "google/gemma-3-27b-it",
 
38
  "is_thinking": False,
39
  "supports_reasoning_level": False,
40
  },
 
 
 
 
 
 
41
  {
42
  "name": "Gemma-3-27B-Instruct",
43
  "id": "google/gemma-3-27b-it",
utils/dataset.py CHANGED
@@ -43,18 +43,22 @@ def get_dataset_repo_id(token: str | None) -> str:
43
  return f"{username}/moderation-test-results"
44
 
45
 
46
- def load_dataset_from_hub(token: str | None) -> tuple[list[dict], Exception | None]:
 
 
 
 
 
47
  """
48
  Load dataset from Hub and return list of examples.
49
 
50
  Args:
51
- token: HF token string or None
 
52
 
53
  Returns:
54
  Tuple of (list of example dicts, error Exception or None if successful)
55
  """
56
- repo_id = get_dataset_repo_id(token)
57
-
58
  try:
59
  # Use load_dataset - more standard way to load from Hub
60
  dataset_dict = load_dataset(repo_id, token=token)
@@ -101,11 +105,12 @@ def format_categories_and_reasoning(parsed: dict) -> str:
101
  return "*No categories found in response*\n\nThis output expects a valid JSON response, as specified for example in the default prompt.\n\nThe raw response can be seen in the Model Response section below."
102
 
103
 
104
- def save_to_dataset(token: str | None, data: dict) -> tuple[bool, str]:
105
  """
106
  Save test result to Hugging Face dataset.
107
 
108
  Args:
 
109
  token: HF token string or None
110
  data: Dict with all test result fields
111
 
@@ -113,10 +118,8 @@ def save_to_dataset(token: str | None, data: dict) -> tuple[bool, str]:
113
  Tuple of (success: bool, message: str)
114
  """
115
  try:
116
- repo_id = get_dataset_repo_id(token)
117
-
118
  # Load existing dataset and examples using shared function
119
- examples, load_error = load_dataset_from_hub(token)
120
 
121
  # If there was an error loading (other than FileNotFoundError), raise it
122
  if load_error is not None:
@@ -134,7 +137,6 @@ def save_to_dataset(token: str | None, data: dict) -> tuple[bool, str]:
134
  except FileNotFoundError:
135
  # Dataset doesn't exist yet, create new one
136
  try:
137
- repo_id = get_dataset_repo_id(token)
138
  dataset = Dataset.from_list([data])
139
  dataset.push_to_hub(repo_id, token=token, private=True)
140
  return True, f"Saved to {repo_id}"
@@ -144,18 +146,19 @@ def save_to_dataset(token: str | None, data: dict) -> tuple[bool, str]:
144
  return False, f"Failed to save: {str(e)}"
145
 
146
 
147
- def load_dataset_examples(token: str | None) -> tuple[list[dict], list[str]]:
148
  """
149
  Load examples from Hugging Face dataset.
150
 
151
  Args:
152
- token: HF token string or None
 
153
 
154
  Returns:
155
  Tuple of (list of example dicts, list of formatted dropdown labels)
156
  """
157
  # Use shared loading function
158
- examples, load_error = load_dataset_from_hub(token)
159
 
160
  # If there was an error loading, return empty lists
161
  if load_error is not None:
 
43
  return f"{username}/moderation-test-results"
44
 
45
 
46
+ def get_roost_dataset_repo_id() -> str:
47
+ """Get ROOST org dataset repository ID."""
48
+ return "roosttools/moderation-test-results"
49
+
50
+
51
+ def load_dataset_from_hub(repo_id: str, token: str | None) -> tuple[list[dict], Exception | None]:
52
  """
53
  Load dataset from Hub and return list of examples.
54
 
55
  Args:
56
+ repo_id: Dataset repository ID
57
+ token: HF token string or None (None allows public dataset access)
58
 
59
  Returns:
60
  Tuple of (list of example dicts, error Exception or None if successful)
61
  """
 
 
62
  try:
63
  # Use load_dataset - more standard way to load from Hub
64
  dataset_dict = load_dataset(repo_id, token=token)
 
105
  return "*No categories found in response*\n\nThis output expects a valid JSON response, as specified for example in the default prompt.\n\nThe raw response can be seen in the Model Response section below."
106
 
107
 
108
+ def save_to_dataset(repo_id: str, token: str | None, data: dict) -> tuple[bool, str]:
109
  """
110
  Save test result to Hugging Face dataset.
111
 
112
  Args:
113
+ repo_id: Dataset repository ID (e.g., "username/moderation-test-results" or "roosttools/moderation-test-results")
114
  token: HF token string or None
115
  data: Dict with all test result fields
116
 
 
118
  Tuple of (success: bool, message: str)
119
  """
120
  try:
 
 
121
  # Load existing dataset and examples using shared function
122
+ examples, load_error = load_dataset_from_hub(repo_id, token)
123
 
124
  # If there was an error loading (other than FileNotFoundError), raise it
125
  if load_error is not None:
 
137
  except FileNotFoundError:
138
  # Dataset doesn't exist yet, create new one
139
  try:
 
140
  dataset = Dataset.from_list([data])
141
  dataset.push_to_hub(repo_id, token=token, private=True)
142
  return True, f"Saved to {repo_id}"
 
146
  return False, f"Failed to save: {str(e)}"
147
 
148
 
149
+ def load_dataset_examples(repo_id: str, token: str | None) -> tuple[list[dict], list[str]]:
150
  """
151
  Load examples from Hugging Face dataset.
152
 
153
  Args:
154
+ repo_id: Dataset repository ID
155
+ token: HF token string or None (None allows public dataset access)
156
 
157
  Returns:
158
  Tuple of (list of example dicts, list of formatted dropdown labels)
159
  """
160
  # Use shared loading function
161
+ examples, load_error = load_dataset_from_hub(repo_id, token)
162
 
163
  # If there was an error loading, return empty lists
164
  if load_error is not None:
utils/helpers.py CHANGED
@@ -6,9 +6,11 @@ import gradio as gr
6
  from dotenv import load_dotenv
7
 
8
 
9
- def get_hf_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
10
  """
11
- Get Hugging Face token from OAuth or .env fallback.
 
 
12
 
13
  Args:
14
  oauth_token: Gradio OAuth token from user login, or None
@@ -18,7 +20,7 @@ def get_hf_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
18
  - hf_token: Token string if available, None otherwise
19
  - status_message: Warning message if using local .env, empty string otherwise
20
  """
21
- print(f"DEBUG: get_hf_token called with oauth_token type: {type(oauth_token)}")
22
 
23
  if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
24
  # Try loading from .env file
@@ -47,6 +49,103 @@ def get_hf_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
47
  return token, ""
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def load_preset_policy(preset_name: str, base_dir: str) -> tuple[str, str]:
51
  """Load preset policy from markdown file."""
52
  preset_files = {
 
6
  from dotenv import load_dotenv
7
 
8
 
9
+ def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
10
  """
11
+ Get personal Hugging Face token from OAuth or .env fallback.
12
+
13
+ Used for personal/user operations like saving to private datasets.
14
 
15
  Args:
16
  oauth_token: Gradio OAuth token from user login, or None
 
20
  - hf_token: Token string if available, None otherwise
21
  - status_message: Warning message if using local .env, empty string otherwise
22
  """
23
+ print(f"DEBUG: get_personal_token called with oauth_token type: {type(oauth_token)}")
24
 
25
  if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
26
  # Try loading from .env file
 
49
  return token, ""
50
 
51
 
52
+ def get_org_token() -> str | None:
53
+ """
54
+ Get organization token from Space secret or .env fallback.
55
+
56
+ Used for ROOST org dataset operations and inference (preferred).
57
+
58
+ Returns:
59
+ Token string if available, None otherwise
60
+ """
61
+ # Check Space secret HACKATHON_INFERENCE_TOKEN
62
+ org_token = os.getenv("HACKATHON_INFERENCE_TOKEN")
63
+ if org_token:
64
+ return org_token
65
+
66
+ # Fall back to .env file
67
+ load_dotenv()
68
+ org_token = os.getenv("ROOST_TOKEN_FALLBACK")
69
+ if org_token:
70
+ return org_token
71
+
72
+ return None
73
+
74
+
75
+ def get_inference_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
76
+ """
77
+ Get token for inference (org token preferred, falls back to personal).
78
+
79
+ Returns:
80
+ Tuple of (token, status_message)
81
+ """
82
+ # Try org token first
83
+ org_token = get_org_token()
84
+ if org_token:
85
+ return org_token, ""
86
+
87
+ # Fall back to personal token
88
+ personal_token, status_msg = get_personal_token(oauth_token)
89
+ return personal_token, status_msg
90
+
91
+
92
+ def check_token_availability(oauth_token: gr.OAuthToken | None) -> tuple[bool, bool]:
93
+ """
94
+ Check which tokens are available.
95
+
96
+ Returns:
97
+ Tuple of (has_personal: bool, has_org: bool)
98
+ """
99
+ has_personal = get_personal_token(oauth_token)[0] is not None
100
+ has_org = get_org_token() is not None
101
+ return has_personal, has_org
102
+
103
+
104
+ def format_token_status(oauth_token: gr.OAuthToken | None) -> str:
105
+ """
106
+ Format markdown showing token status and usage.
107
+
108
+ Returns:
109
+ Markdown string explaining which tokens are set and their uses
110
+ """
111
+ has_personal, has_org = check_token_availability(oauth_token)
112
+
113
+ lines = [
114
+ "You can log in to yout Hugging Face account to save your work in a private dataset and use the app for inference after the end of the hackathon.",
115
+ "### Token Status",
116
+ ]
117
+
118
+ # Personal token status
119
+ if has_personal:
120
+ personal_token, status_msg = get_personal_token(oauth_token)
121
+ if oauth_token and oauth_token.token:
122
+ source = "OAuth login"
123
+ else:
124
+ source = ".env file"
125
+ lines.append(f"- **Personal Token**: ✅ Available ({source})")
126
+ lines.append(" - Enables: Inference (fallback), Private dataset saves/loads")
127
+ else:
128
+ lines.append("- **Personal Token**: ❌ Not available")
129
+ lines.append(" - Required for: Private dataset operations")
130
+
131
+ # Org token status
132
+ if has_org:
133
+ org_token = get_org_token()
134
+ # Check if it's from Space secret or .env
135
+ if os.getenv("HACKATHON_INFERENCE_TOKEN"):
136
+ source = "Space secret"
137
+ else:
138
+ source = ".env file"
139
+ lines.append(f"- **Org Token**: ✅ Available ({source})")
140
+ lines.append(" - Enables: Inference (preferred), ROOST dataset saves/loads")
141
+ else:
142
+ lines.append("- **Org Token**: ❌ Not available")
143
+ lines.append(" - Required for: ROOST dataset saves")
144
+ lines.append(" - Note: ROOST dataset can be loaded if public")
145
+
146
+ return "\n".join(lines)
147
+
148
+
149
  def load_preset_policy(preset_name: str, base_dir: str) -> tuple[str, str]:
150
  """Load preset policy from markdown file."""
151
  preset_files = {
utils/model_interface.py CHANGED
@@ -123,7 +123,7 @@ def run_test(
123
  temperature=temperature,
124
  top_p=top_p,
125
  stop=None,
126
- # extra_headers={"X-HF-Bill-To": "roosttools"},
127
  )
128
 
129
  result = {"content": completion.choices[0].message.content}
 
123
  temperature=temperature,
124
  top_p=top_p,
125
  stop=None,
126
+ extra_headers={"X-HF-Bill-To": "roosttools"},
127
  )
128
 
129
  result = {"content": completion.choices[0].message.content}