|
|
|
|
|
""" |
|
|
Mizan Leaderboard - Enhanced Version with Submit Functionality |
|
|
Includes leaderboard display, model submission, and evaluation tracking |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
from ui_components import ( |
|
|
create_leaderboard_tab, create_dataset_tab, create_submit_evaluation_tab |
|
|
) |
|
|
from data_processor import load_leaderboard_from_csv |
|
|
from evaluation_service import submit_evaluation |
|
|
|
|
|
|
|
|
current_data = None |
|
|
|
|
|
|
|
|
def create_leaderboard_demo(): |
|
|
"""Create enhanced leaderboard demo interface with submit functionality""" |
|
|
|
|
|
global current_data |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
current_data = load_leaderboard_from_csv() |
|
|
|
|
|
with gr.Blocks( |
|
|
title="Mizan", |
|
|
theme=gr.themes.Soft() |
|
|
) as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# Mizan Leaderboard |
|
|
|
|
|
Performance comparison for Turkish embedding models |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("π Leaderboard"): |
|
|
leaderboard_table = create_leaderboard_tab(current_data) |
|
|
|
|
|
|
|
|
with gr.Tab("π Submit"): |
|
|
(model_input, email_input, submit_btn, login_button, result_output) = create_submit_evaluation_tab() |
|
|
|
|
|
|
|
|
def handle_submit_evaluation(model_name, email, profile, progress=gr.Progress()): |
|
|
import logging |
|
|
|
|
|
|
|
|
if profile is None: |
|
|
logging.warning("Unauthorized submission attempt with no profile") |
|
|
return "<p style='color: red; font-weight: bold;'>Authentication required. Please log in with your Hugging Face account.</p>" |
|
|
|
|
|
|
|
|
|
|
|
if isinstance(profile, str) and profile == "Sign in with Hugging Face": |
|
|
|
|
|
return "<p style='color: orange; font-weight: bold;'>β οΈ HF authentication required.</p>" |
|
|
|
|
|
|
|
|
if not email or email.strip() == "": |
|
|
return "<p style='color: red; font-weight: bold;'>Email address is required to receive benchmark results.</p>" |
|
|
|
|
|
global current_data |
|
|
batch_size = 32 |
|
|
result_msg, updated_data = submit_evaluation(model_name, email, batch_size, current_data, progress) |
|
|
|
|
|
|
|
|
logging.info(f"Submission processed for model: {model_name} by user: {profile}") |
|
|
return result_msg |
|
|
|
|
|
submit_btn.click( |
|
|
fn=handle_submit_evaluation, |
|
|
inputs=[model_input, email_input, login_button], |
|
|
outputs=[result_output] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("π Dataset Information"): |
|
|
dataset_table = create_dataset_tab() |
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### π Metrics Explanation: |
|
|
- **Mean (Task)**: Average performance across all individual tasks |
|
|
- **Mean (TaskType)**: Average performance by task categories |
|
|
- **Classification**: Performance on Turkish classification tasks |
|
|
- **Clustering**: Performance on Turkish clustering tasks |
|
|
- **Pair Classification**: Performance on pair classification tasks (like NLI) |
|
|
- **Retrieval**: Performance on information retrieval tasks |
|
|
- **STS**: Performance on Semantic Textual Similarity tasks |
|
|
- **Correlation**: Weighted average of correlation metrics for NLI and STSB datasets |
|
|
- **Parameters**: Number of model parameters |
|
|
- **Embed Dim**: Embedding dimension size |
|
|
- **Max Seq Length**: Maximum sequence length the model can process (0 = infinite/unlimited) |
|
|
- **Vocab Size**: Size of the model's vocabulary |
|
|
|
|
|
### π About Mizan: |
|
|
This leaderboard presents results from the **Mizan** benchmark, which evaluates embedding models |
|
|
on Turkish language tasks across multiple domains including: |
|
|
- Text classification and sentiment analysis |
|
|
- Information retrieval and search |
|
|
- Semantic textual similarity |
|
|
- Text clustering and pair classification |
|
|
|
|
|
### π Submit Your Model: |
|
|
Use the **Submit** tab to submit your Turkish embedding model for evaluation. |
|
|
Your request will be reviewed by administrators and you'll receive email notifications about the progress. |
|
|
|
|
|
### Contact: |
|
|
For any questions or feedback, please contact info@newmind.ai |
|
|
|
|
|
### Links: |
|
|
- **GitHub**: [mteb/mteb v1.38.51](https://github.com/embeddings-benchmark/mteb/tree/1.38.51) - Mizan is currently based on MTEB v1.38.51 (MTEB v2.0.0 support coming soon) |
|
|
""") |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main entry point""" |
|
|
print("π Starting Mizan Leaderboard...") |
|
|
|
|
|
demo = create_leaderboard_demo() |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|