Leaderboard

Running

App Files Files Community

bgamazay commited on 8 days ago

Commit

47d601f

verified ·

1 Parent(s): c0062b7

Upload app.py

Browse files

Files changed (1) hide show

app.py +153 -13

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import base64
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""@misc{aienergyscore-leaderboard,
-    author = {Sasha Luccioni and Boris Gamazaychikov and Emma Strubell and Sara Hooker and Yacine Jernite and Carole-Jean Wu and Margaret Mitchell},
-    title = {AI Energy Score Leaderboard - February 2025},
     year = {2025},
     publisher = {Hugging Face},
     howpublished = "\url{https://huggingface.co/spaces/AIEnergyScore/Leaderboard}",
@@ -18,6 +18,7 @@ tasks = [
     'asr.csv',
     'object_detection.csv',
     'text_classification.csv',
     'image_captioning.csv',
     'question_answering.csv',
     'text_generation.csv',
@@ -27,6 +28,21 @@ tasks = [
     'summarization.csv'
 ]
 ### HELPER FUNCTIONS ###
 def format_stars(score):
@@ -59,12 +75,31 @@ def generate_html_table_from_df(df):
     max_energy = df['gpu_energy_numeric'].max() if not df.empty else 1
     color_map = {"1": "black", "2": "black", "3": "black", "4": "black", "5": "black"}
     html = '<table class="data-table" style="width:100%; border-collapse: collapse; font-family: Inter, sans-serif;">'
     html += '<thead><tr style="background-color: #f2f2f2;">'
     html += '<th style="text-align: left; padding: 8px;" title="Model name with link to Hugging Face">Model</th>'
     html += '<th style="text-align: left; padding: 8px;" title="AI Provider extracted from the model name">Provider</th>'
-    html += '<th style="text-align: left; padding: 8px;" title="GPU energy consumed in Watt-hours for 1,000 queries">GPU Energy (Wh)</th>'
     html += '<th style="text-align: left; padding: 8px;" title="Energy efficiency score (stars)">Score</th>'
     html += '</tr></thead>'
     html += '<tbody>'
     for _, row in df.iterrows():
@@ -79,6 +114,13 @@ def generate_html_table_from_df(df):
         html += (f'<td style="padding: 8px;">{energy_str}<br>'
                  f'<div style="background-color: {bar_color}; width: {bar_width:.1f}%; height: 10px;"></div></td>')
         html += f'<td style="padding: 8px;">{row["Score"]}</td>'
         html += '</tr>'
     html += '</tbody></table>'
     return f'<div class="table-container">{html}</div>'
@@ -87,8 +129,16 @@ def process_df(task, sort_order="Low to High", filter_fn=None):
     df = pd.read_csv(os.path.join("data", "energy", task))
     if df.columns[0].startswith("Unnamed:"):
         df = df.iloc[:, 1:]
-    df['energy_score'] = df['energy_score'].astype(int)
-    df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='raise') * 1000
     if filter_fn is not None:
         df = filter_fn(df)
     df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0])
@@ -98,19 +148,37 @@ def process_df(task, sort_order="Low to High", filter_fn=None):
     df = df.sort_values(by='gpu_energy_numeric', ascending=ascending)
     return df
 def compute_efficiency_ratio(df):
     if df.empty:
         return 1
     min_val = df['gpu_energy_numeric'].min()
     max_val = df['gpu_energy_numeric'].max()
     ratio = max_val / min_val if min_val > 0 else 1
     return ratio
 def generate_info_callout(ratio, scope_text):
     return (
         f'<div style="text-align: right;">'
         f'<div class="info-callout" style="display:inline-block; max-width:250px; font-size:0.8em; background-color:#e6ffe6; padding:8px; border-radius:5px;">'
-        f'💡 There\'s a <strong style="color: black !important;">{ratio:,.1f}x</strong> difference between the highest and lowest energy use in {scope_text}.'
         f'</div></div>'
     )
@@ -151,7 +219,7 @@ def get_zip_data_link():
 ### UPDATE FUNCTIONS (RETURNING CALLOUT AND TABLE HTML) ###
-def update_text_generation(selected_display, sort_order):
     mapping = {
         "A (Single Consumer GPU) <20B parameters": "A",
         "B (Single Cloud GPU) 20-66B parameters": "B",
@@ -159,18 +227,49 @@ def update_text_generation(selected_display, sort_order):
     }
     model_class = mapping.get(selected_display, "A")
     def filter_fn(df):
         if 'class' in df.columns:
             return df[df['class'] == model_class]
         return df
     df = process_df('text_generation.csv', sort_order, filter_fn)
     ratio = compute_efficiency_ratio(df)
     # For Text Generation, use "this class" as the scope.
     callout = generate_info_callout(ratio, "this class")
     table_html = generate_html_table_from_df(df)
     return callout, table_html
 def update_image_generation(sort_order):
     df = process_df('image_generation.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -178,6 +277,7 @@ def update_image_generation(sort_order):
 def update_text_classification(sort_order):
     df = process_df('text_classification.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -185,6 +285,7 @@ def update_text_classification(sort_order):
 def update_image_classification(sort_order):
     df = process_df('image_classification.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -192,6 +293,7 @@ def update_image_classification(sort_order):
 def update_image_captioning(sort_order):
     df = process_df('image_captioning.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -199,6 +301,7 @@ def update_image_captioning(sort_order):
 def update_summarization(sort_order):
     df = process_df('summarization.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -206,6 +309,7 @@ def update_summarization(sort_order):
 def update_asr(sort_order):
     df = process_df('asr.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -213,6 +317,7 @@ def update_asr(sort_order):
 def update_object_detection(sort_order):
     df = process_df('object_detection.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -220,6 +325,7 @@ def update_object_detection(sort_order):
 def update_sentence_similarity(sort_order):
     df = process_df('sentence_similarity.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -227,6 +333,7 @@ def update_sentence_similarity(sort_order):
 def update_extractive_qa(sort_order):
     df = process_df('question_answering.csv', sort_order)
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
@@ -238,12 +345,18 @@ def update_all_tasks(sort_order):
         df = pd.read_csv(os.path.join("data", "energy", task))
         if df.columns[0].startswith("Unnamed:"):
             df = df.iloc[:, 1:]
-        df['energy_score'] = df['energy_score'].astype(int)
-        df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='raise') * 1000
         df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0])
         df['Model'] = df['model'].apply(make_link)
         df['Score'] = df['energy_score'].apply(format_stars)
         all_df = pd.concat([all_df, df], ignore_index=True)
     all_df = all_df.drop_duplicates(subset=['model'])
     ascending = True if sort_order == "Low to High" else False
     all_df = all_df.sort_values(by='gpu_energy_numeric', ascending=ascending)
@@ -365,14 +478,41 @@ with demo:
                 with gr.Column(scale=4):
                     sort_dropdown_tg = gr.Dropdown(choices=["Low to High", "High to Low"], label="Sort", value="Low to High")
                 with gr.Column(scale=4):
                     tg_callout = gr.HTML()
             tg_table = gr.HTML()
-            init_callout, init_table = update_text_generation(model_class_options[0], "Low to High")
             tg_callout.value = init_callout
             tg_table.value = init_table
-            model_class_dropdown.change(fn=update_text_generation, inputs=[model_class_dropdown, sort_dropdown_tg], outputs=[tg_callout, tg_table])
-            sort_dropdown_tg.change(fn=update_text_generation, inputs=[model_class_dropdown, sort_dropdown_tg], outputs=[tg_callout, tg_table])
         # --- Image Generation Tab ---
         with gr.TabItem("Image Generation 📷"):
             with gr.Row():
@@ -511,6 +651,6 @@ with demo:
             lines=10,
             show_copy_button=True,
         )
-    gr.Markdown("Last updated: February 2025")
 demo.launch()

 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""@misc{aienergyscore-leaderboard,
+    author = {Sasha Luccioni and Boris Gamazaychikov and Emma Strubell and Sara Hooker and Yacine Jernite and Margaret Mitchell},
+    title = {AI Energy Score Leaderboard - December 2025},
     year = {2025},
     publisher = {Hugging Face},
     howpublished = "\url{https://huggingface.co/spaces/AIEnergyScore/Leaderboard}",
     'asr.csv',
     'object_detection.csv',
     'text_classification.csv',
+    'reasoning.csv',
     'image_captioning.csv',
     'question_answering.csv',
     'text_generation.csv',
     'summarization.csv'
 ]
+# Mapping for display names in "All Tasks"
+TASK_NAME_MAPPING = {
+    'text_generation.csv': 'Text Generation 💬',
+    'reasoning.csv': 'Reasoning 🧠',
+    'image_generation.csv': 'Image Generation 📷',
+    'text_classification.csv': 'Text Classification 🎭',
+    'image_classification.csv': 'Image Classification 🖼️',
+    'image_captioning.csv': 'Image Captioning 📝',
+    'summarization.csv': 'Summarization 📃',
+    'asr.csv': 'Automatic Speech Recognition 💬',
+    'object_detection.csv': 'Object Detection 🚘',
+    'sentence_similarity.csv': 'Sentence Similarity 📚',
+    'question_answering.csv': 'Extractive QA ❔'
+}
 ### HELPER FUNCTIONS ###
 def format_stars(score):
     max_energy = df['gpu_energy_numeric'].max() if not df.empty else 1
     color_map = {"1": "black", "2": "black", "3": "black", "4": "black", "5": "black"}
+    task_name = df.attrs.get("task_name", "")
+    # Check if we should display the 'Task' column (only for All Tasks view)
+    has_task_column = 'Task' in df.columns
+    if task_name not in ["text_generation.csv", "reasoning.csv"]:
+        has_test_date = True
+        df["test date"] = "Feb 25"
+    else:
+        has_test_date = ('test date' in df.columns or 'Test Date' in df.columns)
+        if 'Test Date' in df.columns and 'test date' not in df.columns:
+            df = df.rename(columns={'Test Date':'test date'})
     html = '<table class="data-table" style="width:100%; border-collapse: collapse; font-family: Inter, sans-serif;">'
     html += '<thead><tr style="background-color: #f2f2f2;">'
     html += '<th style="text-align: left; padding: 8px;" title="Model name with link to Hugging Face">Model</th>'
     html += '<th style="text-align: left; padding: 8px;" title="AI Provider extracted from the model name">Provider</th>'
+    html += '<th style="text-align: left; padding: 8px;" title="GPU energy consumed in Watt-hours for 1,000 queries">GPU Energy (Wh) per 1k Queries</th>'
     html += '<th style="text-align: left; padding: 8px;" title="Energy efficiency score (stars)">Score</th>'
+    if has_task_column:
+        html += '<th style="text-align: left; padding: 8px;" title="Task Category">Task</th>'
+    if has_test_date:
+        html += '<th style="text-align: left; padding: 8px;" title="Benchmark test date">Test Date</th>'
     html += '</tr></thead>'
     html += '<tbody>'
     for _, row in df.iterrows():
         html += (f'<td style="padding: 8px;">{energy_str}<br>'
                  f'<div style="background-color: {bar_color}; width: {bar_width:.1f}%; height: 10px;"></div></td>')
         html += f'<td style="padding: 8px;">{row["Score"]}</td>'
+        if has_task_column:
+            html += f'<td style="padding: 8px;">{row["Task"]}</td>'
+        if has_test_date:
+            td = row.get('test date', row.get('Test Date', ''))
+            html += f'<td style="padding: 8px;">{td}</td>'
         html += '</tr>'
     html += '</tbody></table>'
     return f'<div class="table-container">{html}</div>'
     df = pd.read_csv(os.path.join("data", "energy", task))
     if df.columns[0].startswith("Unnamed:"):
         df = df.iloc[:, 1:]
+    df['energy_score'] = pd.to_numeric(df['energy_score'], errors='coerce').fillna(0).clip(lower=0, upper=5).astype(int)
+    # Using raw numbers, no pre-rounding
+    df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='coerce').fillna(0.0) * 1000
+    # normalize test date header if present
+    if 'Test Date' in df.columns and 'test date' not in df.columns:
+        df = df.rename(columns={'Test Date':'test date'})
+    if 'test_date' in df.columns and 'test date' not in df.columns:
+        df = df.rename(columns={'test_date':'test date'})
+    if 'test date' in df.columns:
+        df['test date'] = df['test date'].astype(str).str.strip()
     if filter_fn is not None:
         df = filter_fn(df)
     df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0])
     df = df.sort_values(by='gpu_energy_numeric', ascending=ascending)
     return df
+def get_test_date_choices(task_filename):
+    try:
+        df = pd.read_csv(os.path.join("data","energy", task_filename))
+        if df.columns[0].startswith("Unnamed:"):
+            df = df.iloc[:,1:]
+        if 'Test Date' in df.columns and 'test date' not in df.columns:
+            df = df.rename(columns={'Test Date':'test date'})
+        if 'test_date' in df.columns and 'test date' not in df.columns:
+            df = df.rename(columns={'test_date':'test date'})
+        if 'test date' in df.columns:
+            return sorted([d for d in df['test date'].astype(str).str.strip().unique().tolist() if d])
+        return []
+    except Exception:
+        return []
 def compute_efficiency_ratio(df):
     if df.empty:
         return 1
+    # Use unrounded raw numbers for calculation
     min_val = df['gpu_energy_numeric'].min()
     max_val = df['gpu_energy_numeric'].max()
     ratio = max_val / min_val if min_val > 0 else 1
     return ratio
 def generate_info_callout(ratio, scope_text):
+    # Rounded to no decimals (.0f) for display
     return (
         f'<div style="text-align: right;">'
         f'<div class="info-callout" style="display:inline-block; max-width:250px; font-size:0.8em; background-color:#e6ffe6; padding:8px; border-radius:5px;">'
+        f'💡 There\'s a <strong style="color: black !important;">{ratio:,.0f}x</strong> difference between the highest and lowest energy use in {scope_text}.'
         f'</div></div>'
     )
 ### UPDATE FUNCTIONS (RETURNING CALLOUT AND TABLE HTML) ###
+def update_text_generation(selected_display, sort_order, selected_dates):
     mapping = {
         "A (Single Consumer GPU) <20B parameters": "A",
         "B (Single Cloud GPU) 20-66B parameters": "B",
     }
     model_class = mapping.get(selected_display, "A")
     def filter_fn(df):
+        # filter by selected test dates as well
+        if 'Test Date' in df.columns and 'test date' not in df.columns:
+            df.rename(columns={'Test Date':'test date'}, inplace=True)
+        if 'test_date' in df.columns and 'test date' not in df.columns:
+            df.rename(columns={'test_date':'test date'}, inplace=True)
+        if selected_dates:
+            df = df[df['test date'].astype(str).isin(selected_dates)]
         if 'class' in df.columns:
             return df[df['class'] == model_class]
         return df
     df = process_df('text_generation.csv', sort_order, filter_fn)
+    df.attrs["task_name"] = "text_generation.csv"
     ratio = compute_efficiency_ratio(df)
     # For Text Generation, use "this class" as the scope.
     callout = generate_info_callout(ratio, "this class")
     table_html = generate_html_table_from_df(df)
     return callout, table_html
+def update_reasoning(selected_display, sort_order):
+    mapping = {
+        "A (Single Consumer GPU) <20B parameters": "A",
+        "B (Single Cloud GPU) 20-66B parameters": "B",
+        "C (Multiple Cloud GPUs) >66B parameters": "C"
+    }
+    model_class = mapping.get(selected_display, "A")
+    def filter_fn(df):
+        # class-only filter; no test-date filtering for Reasoning
+        if 'class' in df.columns:
+            df = df[df['class'] == model_class]
+        return df
+    df = process_df('reasoning.csv', sort_order, filter_fn)
+    df.attrs["task_name"] = "reasoning.csv"
+    ratio = compute_efficiency_ratio(df)
+    callout = generate_info_callout(ratio, "this class")
+    table_html = generate_html_table_from_df(df)
+    return callout, table_html
 def update_image_generation(sort_order):
     df = process_df('image_generation.csv', sort_order)
+    df.attrs["task_name"] = 'image_generation.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_text_classification(sort_order):
     df = process_df('text_classification.csv', sort_order)
+    df.attrs["task_name"] = 'text_classification.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_image_classification(sort_order):
     df = process_df('image_classification.csv', sort_order)
+    df.attrs["task_name"] = 'image_classification.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_image_captioning(sort_order):
     df = process_df('image_captioning.csv', sort_order)
+    df.attrs["task_name"] = 'image_captioning.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_summarization(sort_order):
     df = process_df('summarization.csv', sort_order)
+    df.attrs["task_name"] = 'summarization.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_asr(sort_order):
     df = process_df('asr.csv', sort_order)
+    df.attrs["task_name"] = 'asr.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_object_detection(sort_order):
     df = process_df('object_detection.csv', sort_order)
+    df.attrs["task_name"] = 'object_detection.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_sentence_similarity(sort_order):
     df = process_df('sentence_similarity.csv', sort_order)
+    df.attrs["task_name"] = 'sentence_similarity.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
 def update_extractive_qa(sort_order):
     df = process_df('question_answering.csv', sort_order)
+    df.attrs["task_name"] = 'question_answering.csv'
     ratio = compute_efficiency_ratio(df)
     callout = generate_info_callout(ratio, "this task")
     table_html = generate_html_table_from_df(df)
         df = pd.read_csv(os.path.join("data", "energy", task))
         if df.columns[0].startswith("Unnamed:"):
             df = df.iloc[:, 1:]
+        df['energy_score'] = pd.to_numeric(df['energy_score'], errors='coerce').fillna(0).clip(lower=0, upper=5).astype(int)
+        df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='coerce').fillna(0.0) * 1000
         df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0])
         df['Model'] = df['model'].apply(make_link)
         df['Score'] = df['energy_score'].apply(format_stars)
+        # Add Task column with emoji
+        df['Task'] = TASK_NAME_MAPPING.get(task, task)
         all_df = pd.concat([all_df, df], ignore_index=True)
     all_df = all_df.drop_duplicates(subset=['model'])
     ascending = True if sort_order == "Low to High" else False
     all_df = all_df.sort_values(by='gpu_energy_numeric', ascending=ascending)
                 with gr.Column(scale=4):
                     sort_dropdown_tg = gr.Dropdown(choices=["Low to High", "High to Low"], label="Sort", value="Low to High")
                 with gr.Column(scale=4):
+                    tg_date_choices = get_test_date_choices("text_generation.csv")
+                    date_dropdown_tg = gr.Dropdown(choices=tg_date_choices, value=tg_date_choices, multiselect=True, label="Test Date")
+                with gr.Column(scale=3):
                     tg_callout = gr.HTML()
             tg_table = gr.HTML()
+            init_callout, init_table = update_text_generation(model_class_options[0], "Low to High", get_test_date_choices("text_generation.csv"))
             tg_callout.value = init_callout
             tg_table.value = init_table
+            model_class_dropdown.change(fn=update_text_generation, inputs=[model_class_dropdown, sort_dropdown_tg, date_dropdown_tg], outputs=[tg_callout, tg_table])
+            sort_dropdown_tg.change(fn=update_text_generation, inputs=[model_class_dropdown, sort_dropdown_tg, date_dropdown_tg], outputs=[tg_callout, tg_table])
+            date_dropdown_tg.change(fn=update_text_generation, inputs=[model_class_dropdown, sort_dropdown_tg, date_dropdown_tg], outputs=[tg_callout, tg_table])
+        # --- Reasoning Tab ---
+        with gr.TabItem("Reasoning 🧠"):
+            with gr.Row():
+                with gr.Column(scale=4):
+                    model_class_options = [
+                        "A (Single Consumer GPU) <20B parameters",
+                        "B (Single Cloud GPU) 20-66B parameters",
+                        "C (Multiple Cloud GPUs) >66B parameters"
+                    ]
+                    rs_class_dropdown = gr.Dropdown(choices=model_class_options, value=model_class_options[0], label="Select Model Class")
+                with gr.Column(scale=4):
+                    rs_sort_dropdown = gr.Dropdown(choices=["Low to High", "High to Low"], label="Sort", value="Low to High")
+                with gr.Column(scale=4):
+                    rs_callout = gr.HTML()
+            rs_table = gr.HTML()
+            init_callout, init_table = update_reasoning(model_class_options[0], "Low to High")
+            rs_callout.value = init_callout
+            rs_table.value = init_table
+            rs_class_dropdown.change(fn=update_reasoning, inputs=[rs_class_dropdown, rs_sort_dropdown], outputs=[rs_callout, rs_table])
+            rs_sort_dropdown.change(fn=update_reasoning, inputs=[rs_class_dropdown, rs_sort_dropdown], outputs=[rs_callout, rs_table])
         # --- Image Generation Tab ---
         with gr.TabItem("Image Generation 📷"):
             with gr.Row():
             lines=10,
             show_copy_button=True,
         )
+    gr.Markdown("Last updated: December 2025")
 demo.launch()