Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +9 -0
src/streamlit_app.py
CHANGED
|
@@ -66,6 +66,7 @@ score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
|
|
| 66 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
| 67 |
|
| 68 |
# one page description
|
|
|
|
| 69 |
# st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
|
| 70 |
# Build raw HTML table
|
| 71 |
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
|
|
@@ -154,6 +155,14 @@ Letβs build better evaluations for expert-level AI β together ππ€
|
|
| 154 |
"""
|
| 155 |
)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 159 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|
|
|
|
| 66 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
| 67 |
|
| 68 |
# one page description
|
| 69 |
+
st.markdown("## Leaderboard")
|
| 70 |
# st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
|
| 71 |
# Build raw HTML table
|
| 72 |
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
|
|
|
|
| 155 |
"""
|
| 156 |
)
|
| 157 |
|
| 158 |
+
## π Citation
|
| 159 |
+
```bibtex
|
| 160 |
+
@article{ruan2025expertlongbench,
|
| 161 |
+
title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
|
| 162 |
+
author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
|
| 163 |
+
journal={arXiv preprint arXiv:2506.01241},
|
| 164 |
+
year={2025}
|
| 165 |
+
}
|
| 166 |
|
| 167 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 168 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|