Clémentine commited on
Commit
d7de3ad
·
1 Parent(s): 8c2b68f

added about

Browse files
Files changed (1) hide show
  1. app.py +103 -78
app.py CHANGED
@@ -24,88 +24,113 @@ def daily_checkpoint() -> None:
24
  # Create Gradio interface
25
  def create_app() -> gr.Blocks:
26
  with gr.Blocks(title="Inference Provider Testing Dashboard") as demo:
27
- gr.Markdown("# Inference Provider Testing Dashboard")
28
- gr.Markdown("Launch and monitor evaluation jobs for multiple models and providers.")
29
-
30
- # All action buttons in one row
31
- with gr.Row():
32
- init_btn = gr.Button("Fetch and Initialize Models/Providers", variant="secondary")
33
- launch_btn = gr.Button("Launch All Jobs", variant="primary")
34
- relaunch_failed_btn = gr.Button("Relaunch Failed", variant="stop")
35
- refresh_btn = gr.Button("Refresh Results", variant="secondary")
36
-
37
- output = gr.Textbox(label="Status", interactive=False)
38
-
39
- # Accordion for viewing models/providers list
40
- with gr.Accordion("Models/Providers Configuration", open=False):
41
- models_providers_display = gr.Code(
42
- label="Current Models and Providers",
43
- value=load_models_providers_str(),
44
- interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
 
47
- with gr.Row():
48
- with gr.Column():
49
- gr.Markdown("## Job Results")
50
- results_table = gr.Dataframe(
51
- value=get_results_table(),
52
- interactive=True,
53
- show_search="search",
54
- show_copy_button=True,
55
- show_fullscreen_button=True,
56
- wrap=True,
57
- static_columns=list(range(7)),
58
- datatype=["str", "str", "str", "str", "str", "str", "html", "str"],
59
- elem_id="results_table"
60
- )
61
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Event handlers
64
- init_btn.click(
65
- fn=initialize_models_providers_file,
66
- outputs=[output, models_providers_display]
67
- )
68
-
69
- launch_btn.click(
70
- fn=launch_jobs,
71
- outputs=output
72
- )
73
-
74
- relaunch_failed_btn.click(
75
- fn=relaunch_failed_jobs,
76
- outputs=output
77
- )
78
-
79
- refresh_btn.click(
80
- fn=get_results_table,
81
- outputs=results_table
82
- )
83
-
84
- # Handle dataframe cell selection for relaunch
85
- def handle_table_select(evt: gr.SelectData):
86
- """Handle when a cell in the results table is clicked."""
87
- print(f"[Relaunch] Cell selected - Row: {evt.index[0]}, Col: {evt.index[1]}, Value: {evt.value}")
88
-
89
- # If we selected a "rerun" cell, we relaunch a job
90
- if evt.index[1] == 7:
91
- # Get the full row data from the dataframe
92
- df = get_results_table()
93
- row_data = df.data.iloc[evt.index[0]]
94
-
95
- model = row_data['Model']
96
- provider = row_data['Provider']
97
- print(f"[Relaunch] Relaunching job - Model: {model}, Provider: {provider}")
98
-
99
- run_single_job(model, provider, globals.TASKS)
100
-
101
- # Then update the table
102
- return get_results_table()
103
-
104
- results_table.select(
105
- fn=handle_table_select,
106
- inputs=[],
107
- outputs=results_table
108
- )
 
 
 
109
 
110
  return demo
111
 
 
24
  # Create Gradio interface
25
  def create_app() -> gr.Blocks:
26
  with gr.Blocks(title="Inference Provider Testing Dashboard") as demo:
27
+ with gr.Tab("Main"):
28
+ gr.Markdown("# Inference Provider Testing Dashboard")
29
+ gr.Markdown("Launch and monitor evaluation jobs for multiple models and providers.")
30
+
31
+ # All action buttons in one row
32
+ with gr.Row():
33
+ init_btn = gr.Button("Fetch and Initialize Models/Providers", variant="secondary")
34
+ launch_btn = gr.Button("Launch All Jobs", variant="primary")
35
+ relaunch_failed_btn = gr.Button("Relaunch Failed", variant="stop")
36
+ refresh_btn = gr.Button("Refresh Results", variant="secondary")
37
+
38
+ output = gr.Textbox(label="Status", interactive=False)
39
+
40
+ # Accordion for viewing models/providers list
41
+ with gr.Accordion("Models/Providers Configuration", open=False):
42
+ models_providers_display = gr.Code(
43
+ label="Current Models and Providers",
44
+ value=load_models_providers_str(),
45
+ interactive=False,
46
+ )
47
+
48
+ with gr.Row():
49
+ with gr.Column():
50
+ gr.Markdown("## Job Results")
51
+ results_table = gr.Dataframe(
52
+ value=get_results_table(),
53
+ interactive=True,
54
+ show_search="search",
55
+ show_copy_button=True,
56
+ show_fullscreen_button=True,
57
+ wrap=True,
58
+ static_columns=list(range(7)),
59
+ datatype=["str", "str", "str", "str", "str", "str", "html", "str"],
60
+ elem_id="results_table"
61
+ )
62
+
63
+
64
+ # Event handlers
65
+ init_btn.click(
66
+ fn=initialize_models_providers_file,
67
+ outputs=[output, models_providers_display]
68
  )
69
 
70
+ launch_btn.click(
71
+ fn=launch_jobs,
72
+ outputs=output
73
+ )
 
 
 
 
 
 
 
 
 
 
74
 
75
+ relaunch_failed_btn.click(
76
+ fn=relaunch_failed_jobs,
77
+ outputs=output
78
+ )
79
+
80
+ refresh_btn.click(
81
+ fn=get_results_table,
82
+ outputs=results_table
83
+ )
84
 
85
+ # Handle dataframe cell selection for relaunch
86
+ def handle_table_select(evt: gr.SelectData):
87
+ """Handle when a cell in the results table is clicked."""
88
+ print(f"[Relaunch] Cell selected - Row: {evt.index[0]}, Col: {evt.index[1]}, Value: {evt.value}")
89
+
90
+ # If we selected a "rerun" cell, we relaunch a job
91
+ if evt.index[1] == 7:
92
+ # Get the full row data from the dataframe
93
+ df = get_results_table()
94
+ row_data = df.data.iloc[evt.index[0]]
95
+
96
+ model = row_data['Model']
97
+ provider = row_data['Provider']
98
+ print(f"[Relaunch] Relaunching job - Model: {model}, Provider: {provider}")
99
+
100
+ run_single_job(model, provider, globals.TASKS)
101
+
102
+ # Then update the table
103
+ return get_results_table()
104
+
105
+ results_table.select(
106
+ fn=handle_table_select,
107
+ inputs=[],
108
+ outputs=results_table
109
+ )
110
+ with gr.Tab("About"):
111
+ gr.Markdown("""
112
+ In this demo, we run 10 samples for ifeval (instruction following), gsm_plus (grade school math problems, less contaminated than gsm8k) and gpqa, diamond subset (knowledge),
113
+ for all models and providers combinations.
114
+
115
+ To run any of these locally, you can use the following
116
+ ```python
117
+ from huggingface_hub import run_job, inspect_job, whoami
118
+ job = run_job(
119
+ image="hf.co/spaces/OpenEvals/EvalsOnTheHub",
120
+ command=[
121
+ "lighteval", "endpoint", "inference-providers",
122
+ "model_name=MODEL,provider=PROVIDER",
123
+ "extended|ifeval|0,lighteval|gsm_plus|0,lighteval|gpqa:diamond|0",
124
+ "--max-samples", "10",
125
+ "--push-to-hub", "--save-details",
126
+ "--results-org", "YOURORG"
127
+ ],
128
+ namespace="huggingface",
129
+ secrets={"HF_TOKEN": YOURTOKEN},
130
+ token=YOURTOKEN
131
+ )
132
+ ```
133
+ """)
134
 
135
  return demo
136