Mizan / app.py
nmmursit's picture
Initial commit
9a235dc
raw
history blame
5.84 kB
#!/usr/bin/env python3
"""
Mizan Leaderboard - Enhanced Version with Submit Functionality
Includes leaderboard display, model submission, and evaluation tracking
"""
import gradio as gr
from ui_components import (
create_leaderboard_tab, create_dataset_tab, create_submit_evaluation_tab
)
from data_processor import load_leaderboard_from_csv
from evaluation_service import submit_evaluation
# Global data storage
current_data = None
def create_leaderboard_demo():
"""Create enhanced leaderboard demo interface with submit functionality"""
global current_data
# Setup directories
# Load data from CSV file
current_data = load_leaderboard_from_csv()
with gr.Blocks(
title="Mizan",
theme=gr.themes.Soft()
) as demo:
gr.Markdown("""
# Mizan Leaderboard
Performance comparison for Turkish embedding models
""")
with gr.Tabs():
# Tab 1: Leaderboard
with gr.Tab("πŸ“Š Leaderboard"):
leaderboard_table = create_leaderboard_tab(current_data)
# Tab 2: Submit
with gr.Tab("πŸš€ Submit"):
(model_input, email_input, submit_btn, login_button, result_output) = create_submit_evaluation_tab()
# Submit evaluation functionality with authentication
def handle_submit_evaluation(model_name, email, profile, progress=gr.Progress()):
import logging
# Authentication check
if profile is None:
logging.warning("Unauthorized submission attempt with no profile")
return "<p style='color: red; font-weight: bold;'>Authentication required. Please log in with your Hugging Face account.</p>"
# IMPORTANT: In local development, Gradio returns "Sign in with Hugging Face" string
# This is NOT a real authentication, just a placeholder for local testing
if isinstance(profile, str) and profile == "Sign in with Hugging Face":
# Block submission in local dev with mock auth
return "<p style='color: orange; font-weight: bold;'>⚠️ HF authentication required.</p>"
# Email is required
if not email or email.strip() == "":
return "<p style='color: red; font-weight: bold;'>Email address is required to receive benchmark results.</p>"
global current_data
batch_size = 32 # Always use default batch size
result_msg, updated_data = submit_evaluation(model_name, email, batch_size, current_data, progress)
# Note: For now, we don't update the leaderboard since evaluation is async
# The leaderboard will be updated manually when results are available
logging.info(f"Submission processed for model: {model_name} by user: {profile}")
return result_msg
submit_btn.click(
fn=handle_submit_evaluation,
inputs=[model_input, email_input, login_button],
outputs=[result_output]
)
# Tab 3: Dataset Information
with gr.Tab("πŸ“Š Dataset Information"):
dataset_table = create_dataset_tab()
gr.Markdown("""
---
### πŸ“Š Metrics Explanation:
- **Mean (Task)**: Average performance across all individual tasks
- **Mean (TaskType)**: Average performance by task categories
- **Classification**: Performance on Turkish classification tasks
- **Clustering**: Performance on Turkish clustering tasks
- **Pair Classification**: Performance on pair classification tasks (like NLI)
- **Retrieval**: Performance on information retrieval tasks
- **STS**: Performance on Semantic Textual Similarity tasks
- **Correlation**: Weighted average of correlation metrics for NLI and STSB datasets
- **Parameters**: Number of model parameters
- **Embed Dim**: Embedding dimension size
- **Max Seq Length**: Maximum sequence length the model can process (0 = infinite/unlimited)
- **Vocab Size**: Size of the model's vocabulary
### πŸ“– About Mizan:
This leaderboard presents results from the **Mizan** benchmark, which evaluates embedding models
on Turkish language tasks across multiple domains including:
- Text classification and sentiment analysis
- Information retrieval and search
- Semantic textual similarity
- Text clustering and pair classification
### πŸš€ Submit Your Model:
Use the **Submit** tab to submit your Turkish embedding model for evaluation.
Your request will be reviewed by administrators and you'll receive email notifications about the progress.
### Contact:
For any questions or feedback, please contact info@newmind.ai
### Links:
- **GitHub**: [mteb/mteb v1.38.51](https://github.com/embeddings-benchmark/mteb/tree/1.38.51) - Mizan is currently based on MTEB v1.38.51 (MTEB v2.0.0 support coming soon)
""")
return demo
def main():
"""Main entry point"""
print("πŸš€ Starting Mizan Leaderboard...")
demo = create_leaderboard_demo()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)
if __name__ == "__main__":
main()