Spaces:

Cognitive-Lab
/

indic_llm_leaderboard

Running

App Files Files Community

AdithyaSK commited on Apr 2, 2024

Commit

a6ebd86

1 Parent(s): a95af80

added sort by langauge feature - Adithya S K

Browse files

Files changed (1) hide show

app.py +95 -34

app.py CHANGED Viewed

@@ -8,16 +8,42 @@ import plotly.graph_objs as go
 from huggingface_hub import HfApi
 from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
 from dotenv import load_dotenv
 load_dotenv()
 SERVER_URL = os.getenv("SERVER_URL")
 def get_data():
     response = requests.get(SERVER_URL)
     data = response.json()
     return data
 def main():
     st.set_page_config(page_title="Indic LLM Leaderboard", layout="wide")
@@ -65,10 +91,6 @@ def main():
                 MMLU = item["result"]["MMLU"]["acc_norm"]
             except KeyError:
                 MMLU = None
-            try:
-                Winograde = item["result"]["Winograde"]["acc_norm"]
-            except KeyError:
-                Winograde = None
             try:
                 Translation = item["result"]["Translation"]["acc_norm"]
             except KeyError:
@@ -80,7 +102,7 @@ def main():
             all_models.append(model_name)
             table_data.append({
-                "Model Name": model_name,
                 "Language": language,
                 "Avergae": ALL,
                 "ARC-Easy": ARC_Easy,
@@ -88,60 +110,99 @@ def main():
                 "Hellaswag": Hellaswag,
                 "Boolq": Boolq,
                 "MMLU": MMLU,
-                "Winograde": Winograde,
                 "Translation": Translation,
                 "Generation": Generation
             })
         df = pd.DataFrame(table_data)
-        title = st.text_input('Model Name', placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...")
         col1, col2 = st.columns(2)
         with col1:
             benchmark_options = st.multiselect(
                 'Pick Benchmark',
-                ['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU','Winogrande','Translation','Generation'],['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU'])
         with col2:
             language_options = st.multiselect(
                 'Pick Languages',
                 ['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'],['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'])
-        if title:
-            if ';' in title:
-                model_names = [name.strip() for name in title.split(';')]
-                filtered_df = df[df['Model Name'].isin(model_names)]
-            else:
-                filtered_df = df[df['Model Name'].str.contains(title, case=False, na=False)]
-            filtered_df = filtered_df[filtered_df['Language'].isin(language_options)]
-            filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)]
-            # Calculate average across selected benchmark columns
-            filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
-            # Display the filtered DataFrame
-            st.dataframe(filtered_df, use_container_width=True)
-        elif benchmark_options or language_options:
-            filtered_df = df[df['Language'].isin(language_options)]
-            filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)]
-            # Calculate average across selected benchmark columns
-            filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
-            st.dataframe(filtered_df, use_container_width=True)
         # Multiselect for comparing models
         compare_models = st.multiselect(
             'Pick Models to compare them',
-            df['Model Name'].unique()
         )
         # Display DataFrame for selected models and their scores
         if compare_models:
             compare_data = []
             for model in compare_models:
-                model_data = df[df['Model Name'] == model]
                 compare_data.append(model_data)
             if compare_data:
                 compare_df = pd.concat(compare_data)

 from huggingface_hub import HfApi
 from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
 from dotenv import load_dotenv
+from huggingface_hub import HfApi
+from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
 load_dotenv()
 SERVER_URL = os.getenv("SERVER_URL")
+@st.cache_data
 def get_data():
     response = requests.get(SERVER_URL)
     data = response.json()
     return data
+@st.cache_data
+def get_model_info(df):
+    api = HfApi()
+    # Initialize new columns for likes and tags
+    df['Likes'] = None
+    # Iterate through DataFrame rows
+    for index, row in df.iterrows():
+        model = row['Model'].strip()
+        try:
+            model_info = api.model_info(repo_id=str(model))
+            df.loc[index, 'Likes'] = f"{model_info.likes}🧡"
+            # df.loc[index, 'Tags'] = ', '.join(model_info.tags)
+        except (RepositoryNotFoundError, RevisionNotFoundError):
+            df.loc[index, 'Likes'] = None
+            # df.loc[index, 'Tags'] = ''
+    return df
+# @st.cache_data
 def main():
     st.set_page_config(page_title="Indic LLM Leaderboard", layout="wide")
                 MMLU = item["result"]["MMLU"]["acc_norm"]
             except KeyError:
                 MMLU = None
             try:
                 Translation = item["result"]["Translation"]["acc_norm"]
             except KeyError:
             all_models.append(model_name)
             table_data.append({
+                "Model": model_name,
                 "Language": language,
                 "Avergae": ALL,
                 "ARC-Easy": ARC_Easy,
                 "Hellaswag": Hellaswag,
                 "Boolq": Boolq,
                 "MMLU": MMLU,
                 "Translation": Translation,
                 "Generation": Generation
             })
         df = pd.DataFrame(table_data)
+        title = st.text_input('Model', placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...")
+        on = st.checkbox('Sort by Language')
         col1, col2 = st.columns(2)
         with col1:
             benchmark_options = st.multiselect(
                 'Pick Benchmark',
+                ['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU','Translation','Generation'],['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU'])
         with col2:
             language_options = st.multiselect(
                 'Pick Languages',
                 ['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'],['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'])
+        if on:
+            # Loop through each selected language
+            for language in language_options:
+                filtered_df = df[df['Language'] == language]
+                        # Check if the filtered dataframe is not empty
+                if not filtered_df.empty:
+                    st.subheader(f"{language.capitalize()[0]}{language[1:]}")
+                    filtered_df.reset_index(drop=True, inplace=True)
+                    # Display filtered dataframe
+                    filtered_df = get_model_info(filtered_df)
+                    if title:
+                        if ';' in title:
+                            model_names = [name.strip() for name in title.split(';')]
+                            filtered_df = df[df['Model'].isin(model_names)]
+                        else:
+                            filtered_df = df[df['Model'].str.contains(title, case=False, na=False)]
+                        filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
+                        # Calculate average across selected benchmark columns
+                        filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
+                        filtered_df.index += 1
+                        st.dataframe(filtered_df, use_container_width=True)
+                    elif benchmark_options or language_options:
+                        filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
+                        # Calculate average across selected benchmark columns
+                        filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
+                        filtered_df = get_model_info(filtered_df)
+                        filtered_df.index += 1
+                        st.dataframe(filtered_df, use_container_width=True)
+            # st.write('Feature activated!')
+        else:
+            if title:
+                if ';' in title:
+                    model_names = [name.strip() for name in title.split(';')]
+                    filtered_df = df[df['Model'].isin(model_names)]
+                else:
+                    filtered_df = df[df['Model'].str.contains(title, case=False, na=False)]
+                filtered_df = filtered_df[filtered_df['Language'].isin(language_options)]
+                filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
+                # Calculate average across selected benchmark columns
+                filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
+                filtered_df.index += 1
+                # Display the filtered DataFrame
+                st.dataframe(filtered_df, use_container_width=True)
+            elif benchmark_options or language_options:
+                filtered_df = df[df['Language'].isin(language_options)]
+                filtered_df = filtered_df[df.columns.intersection(['Model', 'Language'] + benchmark_options)]
+                # Calculate average across selected benchmark columns
+                filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1)
+                filtered_df = get_model_info(filtered_df)
+                filtered_df.index += 1
+                st.dataframe(filtered_df, use_container_width=True)
         # Multiselect for comparing models
         compare_models = st.multiselect(
             'Pick Models to compare them',
+            df['Model'].unique()
         )
         # Display DataFrame for selected models and their scores
         if compare_models:
             compare_data = []
             for model in compare_models:
+                model_data = df[df['Model'] == model]
                 compare_data.append(model_data)
             if compare_data:
                 compare_df = pd.concat(compare_data)