Fixed text wrapping in tables & Added Contact Info
Browse files- app.py +3 -5
- src/.DS_Store +0 -0
- src/display/about.py +2 -0
- src/display/css_html_js.py +7 -4
- src/utils.py +27 -0
app.py
CHANGED
|
@@ -14,13 +14,11 @@ logging.basicConfig(
|
|
| 14 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 15 |
)
|
| 16 |
|
| 17 |
-
# Disable all potentially sensitive loggers immediately
|
| 18 |
logging.getLogger("httpx").setLevel(logging.ERROR)
|
| 19 |
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
| 20 |
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
| 21 |
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
| 22 |
|
| 23 |
-
# Minimize the OAuth imports to prevent errors
|
| 24 |
from gradio.oauth import OAuthProfile
|
| 25 |
|
| 26 |
from src.display.about import (
|
|
@@ -214,7 +212,7 @@ def create_demo():
|
|
| 214 |
value=leaderboard_df,
|
| 215 |
label="Model Performance Comparison",
|
| 216 |
interactive=False,
|
| 217 |
-
column_widths=["300px", "
|
| 218 |
|
| 219 |
)
|
| 220 |
|
|
@@ -251,7 +249,7 @@ def create_demo():
|
|
| 251 |
value=human_arena_df,
|
| 252 |
label="Human Arena Results",
|
| 253 |
interactive=False,
|
| 254 |
-
column_widths=["300px", "
|
| 255 |
|
| 256 |
)
|
| 257 |
|
|
@@ -267,7 +265,7 @@ def create_demo():
|
|
| 267 |
value=rag_details_df,
|
| 268 |
label="Retrieval Detailed Results",
|
| 269 |
interactive=False,
|
| 270 |
-
column_widths=["
|
| 271 |
|
| 272 |
)
|
| 273 |
|
|
|
|
| 14 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 15 |
)
|
| 16 |
|
|
|
|
| 17 |
logging.getLogger("httpx").setLevel(logging.ERROR)
|
| 18 |
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
| 19 |
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
| 20 |
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
| 21 |
|
|
|
|
| 22 |
from gradio.oauth import OAuthProfile
|
| 23 |
|
| 24 |
from src.display.about import (
|
|
|
|
| 212 |
value=leaderboard_df,
|
| 213 |
label="Model Performance Comparison",
|
| 214 |
interactive=False,
|
| 215 |
+
column_widths=["300px", "180px" ,"180px", "150px", "150px", "180px", "220px", "100px", "100px", "120px"]
|
| 216 |
|
| 217 |
)
|
| 218 |
|
|
|
|
| 249 |
value=human_arena_df,
|
| 250 |
label="Human Arena Results",
|
| 251 |
interactive=False,
|
| 252 |
+
column_widths=["300px", "180px", "110px", "110px", "110px", "156px", "169px", "100px", "120px"]
|
| 253 |
|
| 254 |
)
|
| 255 |
|
|
|
|
| 265 |
value=rag_details_df,
|
| 266 |
label="Retrieval Detailed Results",
|
| 267 |
interactive=False,
|
| 268 |
+
column_widths=["300px", "120px", "140px", "140px", "140px", "120px", "160px", "100px", "120px"]
|
| 269 |
|
| 270 |
)
|
| 271 |
|
src/.DS_Store
CHANGED
|
Binary files a/src/.DS_Store and b/src/.DS_Store differ
|
|
|
src/display/about.py
CHANGED
|
@@ -60,6 +60,8 @@ Evaluate your model's performance in the following categories:
|
|
| 60 |
7. 🧩 **Structured Outputs** - Coming soon!
|
| 61 |
|
| 62 |
Evaluate your model in any or all of these categories to discover its capabilities and areas of excellence.
|
|
|
|
|
|
|
| 63 |
"""
|
| 64 |
|
| 65 |
# Detailed explanation of benchmarks and reproduction steps
|
|
|
|
| 60 |
7. 🧩 **Structured Outputs** - Coming soon!
|
| 61 |
|
| 62 |
Evaluate your model in any or all of these categories to discover its capabilities and areas of excellence.
|
| 63 |
+
|
| 64 |
+
For any questions, please contact us at info@newmind.ai
|
| 65 |
"""
|
| 66 |
|
| 67 |
# Detailed explanation of benchmarks and reproduction steps
|
src/display/css_html_js.py
CHANGED
|
@@ -44,12 +44,15 @@ custom_css = """
|
|
| 44 |
display: none;
|
| 45 |
}
|
| 46 |
|
| 47 |
-
/*
|
| 48 |
table td:first-child,
|
| 49 |
table th:first-child {
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
|
| 55 |
.tab-buttons button {
|
|
|
|
| 44 |
display: none;
|
| 45 |
}
|
| 46 |
|
| 47 |
+
/* Allow model names to display fully with text wrapping */
|
| 48 |
table td:first-child,
|
| 49 |
table th:first-child {
|
| 50 |
+
min-width: 250px;
|
| 51 |
+
max-width: 500px;
|
| 52 |
+
overflow: visible;
|
| 53 |
+
white-space: normal;
|
| 54 |
+
word-wrap: break-word;
|
| 55 |
+
line-height: 1.4;
|
| 56 |
}
|
| 57 |
|
| 58 |
.tab-buttons button {
|
src/utils.py
CHANGED
|
@@ -917,6 +917,33 @@ def create_raw_details_table(benchmark_data, benchmark_type):
|
|
| 917 |
# Create DataFrame
|
| 918 |
df = pd.DataFrame(flattened_data)
|
| 919 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
# Ensure model_name is first column
|
| 921 |
if "model_name" in df.columns:
|
| 922 |
cols = ["model_name"] + [col for col in df.columns if col != "model_name"]
|
|
|
|
| 917 |
# Create DataFrame
|
| 918 |
df = pd.DataFrame(flattened_data)
|
| 919 |
|
| 920 |
+
# Format confidence interval for arena data
|
| 921 |
+
if benchmark_type == "arena" and "95%(CI)" in df.columns:
|
| 922 |
+
def format_confidence_interval(ci_value):
|
| 923 |
+
"""Convert '-1.65/+2.66' to '+2.66/-1.65' format"""
|
| 924 |
+
if isinstance(ci_value, str) and "/" in ci_value:
|
| 925 |
+
parts = ci_value.split("/")
|
| 926 |
+
if len(parts) == 2:
|
| 927 |
+
negative_part = parts[0].strip()
|
| 928 |
+
positive_part = parts[1].strip()
|
| 929 |
+
|
| 930 |
+
# Remove the signs and get the numbers
|
| 931 |
+
if negative_part.startswith("-"):
|
| 932 |
+
negative_num = negative_part[1:]
|
| 933 |
+
else:
|
| 934 |
+
negative_num = negative_part
|
| 935 |
+
|
| 936 |
+
if positive_part.startswith("+"):
|
| 937 |
+
positive_num = positive_part[1:]
|
| 938 |
+
else:
|
| 939 |
+
positive_num = positive_part
|
| 940 |
+
|
| 941 |
+
# Return in +positive/-negative format
|
| 942 |
+
return f"+{positive_num}/-{negative_num}"
|
| 943 |
+
return ci_value
|
| 944 |
+
|
| 945 |
+
df["95%(CI)"] = df["95%(CI)"].apply(format_confidence_interval)
|
| 946 |
+
|
| 947 |
# Ensure model_name is first column
|
| 948 |
if "model_name" in df.columns:
|
| 949 |
cols = ["model_name"] + [col for col in df.columns if col != "model_name"]
|