Spaces:

newmindai
/

Mizan

Running

App Files Files Community

Mizan / app.py

nmmursit

Initial commit

9a235dc about 2 months ago

raw

history blame

5.84 kB

	#!/usr/bin/env python3
	"""
	Mizan Leaderboard - Enhanced Version with Submit Functionality
	Includes leaderboard display, model submission, and evaluation tracking
	"""

	import gradio as gr

	from ui_components import (
	create_leaderboard_tab, create_dataset_tab, create_submit_evaluation_tab
	)
	from data_processor import load_leaderboard_from_csv
	from evaluation_service import submit_evaluation

	# Global data storage
	current_data = None


	def create_leaderboard_demo():
	"""Create enhanced leaderboard demo interface with submit functionality"""

	global current_data

	# Setup directories


	# Load data from CSV file
	current_data = load_leaderboard_from_csv()

	with gr.Blocks(
	title="Mizan",
	theme=gr.themes.Soft()
	) as demo:

	gr.Markdown("""
	# Mizan Leaderboard

	Performance comparison for Turkish embedding models
	""")

	with gr.Tabs():
	# Tab 1: Leaderboard
	with gr.Tab("📊 Leaderboard"):
	leaderboard_table = create_leaderboard_tab(current_data)

	# Tab 2: Submit
	with gr.Tab("🚀 Submit"):
	(model_input, email_input, submit_btn, login_button, result_output) = create_submit_evaluation_tab()

	# Submit evaluation functionality with authentication
	def handle_submit_evaluation(model_name, email, profile, progress=gr.Progress()):
	import logging

	# Authentication check
	if profile is None:
	logging.warning("Unauthorized submission attempt with no profile")
	return "<p style='color: red; font-weight: bold;'>Authentication required. Please log in with your Hugging Face account.</p>"

	# IMPORTANT: In local development, Gradio returns "Sign in with Hugging Face" string
	# This is NOT a real authentication, just a placeholder for local testing
	if isinstance(profile, str) and profile == "Sign in with Hugging Face":
	# Block submission in local dev with mock auth
	return "<p style='color: orange; font-weight: bold;'>⚠️ HF authentication required.</p>"

	# Email is required
	if not email or email.strip() == "":
	return "<p style='color: red; font-weight: bold;'>Email address is required to receive benchmark results.</p>"

	global current_data
	batch_size = 32 # Always use default batch size
	result_msg, updated_data = submit_evaluation(model_name, email, batch_size, current_data, progress)
	# Note: For now, we don't update the leaderboard since evaluation is async
	# The leaderboard will be updated manually when results are available
	logging.info(f"Submission processed for model: {model_name} by user: {profile}")
	return result_msg

	submit_btn.click(
	fn=handle_submit_evaluation,
	inputs=[model_input, email_input, login_button],
	outputs=[result_output]
	)

	# Tab 3: Dataset Information
	with gr.Tab("📊 Dataset Information"):
	dataset_table = create_dataset_tab()
	gr.Markdown("""
	---
	### 📊 Metrics Explanation:
	- Mean (Task): Average performance across all individual tasks
	- Mean (TaskType): Average performance by task categories
	- Classification: Performance on Turkish classification tasks
	- Clustering: Performance on Turkish clustering tasks
	- Pair Classification: Performance on pair classification tasks (like NLI)
	- Retrieval: Performance on information retrieval tasks
	- STS: Performance on Semantic Textual Similarity tasks
	- Correlation: Weighted average of correlation metrics for NLI and STSB datasets
	- Parameters: Number of model parameters
	- Embed Dim: Embedding dimension size
	- Max Seq Length: Maximum sequence length the model can process (0 = infinite/unlimited)
	- Vocab Size: Size of the model's vocabulary

	### 📖 About Mizan:
	This leaderboard presents results from the Mizan benchmark, which evaluates embedding models
	on Turkish language tasks across multiple domains including:
	- Text classification and sentiment analysis
	- Information retrieval and search
	- Semantic textual similarity
	- Text clustering and pair classification

	### 🚀 Submit Your Model:
	Use the Submit tab to submit your Turkish embedding model for evaluation.
	Your request will be reviewed by administrators and you'll receive email notifications about the progress.

	### Contact:
	For any questions or feedback, please contact info@newmind.ai

	### Links:
	- GitHub: [mteb/mteb v1.38.51](https://github.com/embeddings-benchmark/mteb/tree/1.38.51) - Mizan is currently based on MTEB v1.38.51 (MTEB v2.0.0 support coming soon)
	""")

	return demo


	def main():
	"""Main entry point"""
	print("🚀 Starting Mizan Leaderboard...")

	demo = create_leaderboard_demo()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)


	if __name__ == "__main__":
	main()