nmmursit commited on
Commit
1326dcc
·
verified ·
1 Parent(s): 8c404fc

Fixed text wrapping in tables & Added Contact Info

Browse files
Files changed (5) hide show
  1. app.py +3 -5
  2. src/.DS_Store +0 -0
  3. src/display/about.py +2 -0
  4. src/display/css_html_js.py +7 -4
  5. src/utils.py +27 -0
app.py CHANGED
@@ -14,13 +14,11 @@ logging.basicConfig(
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
15
  )
16
 
17
- # Disable all potentially sensitive loggers immediately
18
  logging.getLogger("httpx").setLevel(logging.ERROR)
19
  logging.getLogger("urllib3").setLevel(logging.ERROR)
20
  logging.getLogger("matplotlib").setLevel(logging.WARNING)
21
  logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
22
 
23
- # Minimize the OAuth imports to prevent errors
24
  from gradio.oauth import OAuthProfile
25
 
26
  from src.display.about import (
@@ -214,7 +212,7 @@ def create_demo():
214
  value=leaderboard_df,
215
  label="Model Performance Comparison",
216
  interactive=False,
217
- column_widths=["300px", "165px" ,"165px", "120px", "120px", "180px", "220px", "100px", "100px", "120px"]
218
 
219
  )
220
 
@@ -251,7 +249,7 @@ def create_demo():
251
  value=human_arena_df,
252
  label="Human Arena Results",
253
  interactive=False,
254
- column_widths=["300px", "150px", "110px", "110px", "110px", "156px", "169px", "100px", "120px"]
255
 
256
  )
257
 
@@ -267,7 +265,7 @@ def create_demo():
267
  value=rag_details_df,
268
  label="Retrieval Detailed Results",
269
  interactive=False,
270
- column_widths=["280px", "120px", "140px", "140px", "140px", "120px", "160px", "100px", "120px"]
271
 
272
  )
273
 
 
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
15
  )
16
 
 
17
  logging.getLogger("httpx").setLevel(logging.ERROR)
18
  logging.getLogger("urllib3").setLevel(logging.ERROR)
19
  logging.getLogger("matplotlib").setLevel(logging.WARNING)
20
  logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
21
 
 
22
  from gradio.oauth import OAuthProfile
23
 
24
  from src.display.about import (
 
212
  value=leaderboard_df,
213
  label="Model Performance Comparison",
214
  interactive=False,
215
+ column_widths=["300px", "180px" ,"180px", "150px", "150px", "180px", "220px", "100px", "100px", "120px"]
216
 
217
  )
218
 
 
249
  value=human_arena_df,
250
  label="Human Arena Results",
251
  interactive=False,
252
+ column_widths=["300px", "180px", "110px", "110px", "110px", "156px", "169px", "100px", "120px"]
253
 
254
  )
255
 
 
265
  value=rag_details_df,
266
  label="Retrieval Detailed Results",
267
  interactive=False,
268
+ column_widths=["300px", "120px", "140px", "140px", "140px", "120px", "160px", "100px", "120px"]
269
 
270
  )
271
 
src/.DS_Store CHANGED
Binary files a/src/.DS_Store and b/src/.DS_Store differ
 
src/display/about.py CHANGED
@@ -60,6 +60,8 @@ Evaluate your model's performance in the following categories:
60
  7. 🧩 **Structured Outputs** - Coming soon!
61
 
62
  Evaluate your model in any or all of these categories to discover its capabilities and areas of excellence.
 
 
63
  """
64
 
65
  # Detailed explanation of benchmarks and reproduction steps
 
60
  7. 🧩 **Structured Outputs** - Coming soon!
61
 
62
  Evaluate your model in any or all of these categories to discover its capabilities and areas of excellence.
63
+
64
+ For any questions, please contact us at info@newmind.ai
65
  """
66
 
67
  # Detailed explanation of benchmarks and reproduction steps
src/display/css_html_js.py CHANGED
@@ -44,12 +44,15 @@ custom_css = """
44
  display: none;
45
  }
46
 
47
- /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
48
  table td:first-child,
49
  table th:first-child {
50
- max-width: 400px;
51
- overflow: auto;
52
- white-space: nowrap;
 
 
 
53
  }
54
 
55
  .tab-buttons button {
 
44
  display: none;
45
  }
46
 
47
+ /* Allow model names to display fully with text wrapping */
48
  table td:first-child,
49
  table th:first-child {
50
+ min-width: 250px;
51
+ max-width: 500px;
52
+ overflow: visible;
53
+ white-space: normal;
54
+ word-wrap: break-word;
55
+ line-height: 1.4;
56
  }
57
 
58
  .tab-buttons button {
src/utils.py CHANGED
@@ -917,6 +917,33 @@ def create_raw_details_table(benchmark_data, benchmark_type):
917
  # Create DataFrame
918
  df = pd.DataFrame(flattened_data)
919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  # Ensure model_name is first column
921
  if "model_name" in df.columns:
922
  cols = ["model_name"] + [col for col in df.columns if col != "model_name"]
 
917
  # Create DataFrame
918
  df = pd.DataFrame(flattened_data)
919
 
920
+ # Format confidence interval for arena data
921
+ if benchmark_type == "arena" and "95%(CI)" in df.columns:
922
+ def format_confidence_interval(ci_value):
923
+ """Convert '-1.65/+2.66' to '+2.66/-1.65' format"""
924
+ if isinstance(ci_value, str) and "/" in ci_value:
925
+ parts = ci_value.split("/")
926
+ if len(parts) == 2:
927
+ negative_part = parts[0].strip()
928
+ positive_part = parts[1].strip()
929
+
930
+ # Remove the signs and get the numbers
931
+ if negative_part.startswith("-"):
932
+ negative_num = negative_part[1:]
933
+ else:
934
+ negative_num = negative_part
935
+
936
+ if positive_part.startswith("+"):
937
+ positive_num = positive_part[1:]
938
+ else:
939
+ positive_num = positive_part
940
+
941
+ # Return in +positive/-negative format
942
+ return f"+{positive_num}/-{negative_num}"
943
+ return ci_value
944
+
945
+ df["95%(CI)"] = df["95%(CI)"].apply(format_confidence_interval)
946
+
947
  # Ensure model_name is first column
948
  if "model_name" in df.columns:
949
  cols = ["model_name"] + [col for col in df.columns if col != "model_name"]