open_universal_arabic_quranic_asr_leaderboard

Running

open_universal_arabic_quranic_asr_leaderboard

File size: 5,414 Bytes

ef32dc8
 
d6c53fc
8be74ba
0991691
5f8d3f6
8be74ba
9329bfd
8be74ba
bba4630
0f588e7
6f39ee0
 
 
0f588e7
8be74ba
 
b4629b3
8be74ba
6a52628
8be74ba
cc9c97c
0f588e7
8be74ba
 
 
9329bfd
 
 
8be74ba
 
0f588e7
 
8be74ba
 
 
 
 
 
6f39ee0
8be74ba
 
d6c53fc
500c19c
 
946805d
 
 
 
8be74ba
 
 
f71d002
8be74ba
 
859f5fd
8be74ba
a4430c5
dde4f6f
 
 
 
a4430c5
f7ce062
8afc8d1
a4430c5
 
8be74ba
 
 
 
e16bf2b
8be74ba
ef32dc8
 
 
8be74ba
 
 
 
 
 
 
 
ef32dc8
8be74ba
 
ef32dc8
8be74ba
ef32dc8
8be74ba
 
 
 
 
b0a4174
 
ef32dc8
 
 
8be74ba
 
 
ef32dc8
 
 
 
ec69747

import gradio as gr
import pandas as pd
from utils_display import make_clickable_model

banner_url = "https://huggingface.co/spaces/deepdml/open_universal_arabic_quranic_asr_leaderboard/main/banner.png"
BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 10vw; max-width: 600px;"> </div>'

INTRODUCTION_TEXT = "📖**Open Universal Arabic Quranic ASR Leaderboard**📖 benchmarks multi-dialect Arabic Quranic ASR models on various multi-dialect datasets.<br>Apart from the WER%/CER% for each test set, we also report the Average WER%/CER% and rank the models based on the Average WER, from lowest to highest.<br>To reproduce the benchmark numbers and request a model that is not listed, you can launch an issue/PR in our [GitHub repo](https://github.com/Natural-Language-Processing-Elm/open_universal_arabic_asr_leaderboard)😊.<br>For more detailed analysis such as models' robustness, speaker adaption, model efficiency and memory usage, please check our [paper](https://arxiv.org/pdf/2412.13788)."

CITATION_BUTTON_TEXT = """
@article{wang2024open,
  title={Open Universal Arabic Quranic ASR Leaderboard},
  author={Jimenez, David},
  year={2025}
}
"""

METRICS_TAB_TEXT = """
## Metrics
We report both the Word Error Rate (WER) and Character Error Rate (CER).
## Reproduction
The Open Universal Arabic Quranic ASR Leaderboard will be a continuous benchmark project. 
\nWe open-source the evaluation scripts at our [GitHub repo](https://github.com/Natural-Language-Processing-Elm/open_universal_arabic_asr_leaderboard).
\nPlease launch a discussion in our GitHub repo to let us know if you want to learn about the performance of a new model.

## Benchmark datasets
| Test Set                                                                                        | Num Samples |
|-------------------------------------------------------------------------------------------------|-------------|
| [Tarteel AI's EveryAyah](https://huggingface.co/datasets/tarteel-ai/everyayah)                  | 23.473      |

## In-depth Analysis
We also provide a comprehensive analysis of the models' robustness, speaker adaptation, inference efficiency and memory consumption.
\nPlease check our [paper](https://arxiv.org/pdf/2412.13788) to learn more.
"""


def styled_message(message):
    return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"

UPDATES = "Oct 20th 2025:[Created repo]<br>"

results = {
    "Model": ["tarteel-ai/whisper-tiny-ar-quran", "Habib-HF/tarbiyah-ai-whisper-medium-merged", "nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0", 
              "IbrahimSalah/Wav2vecLarge_quran_syllables_recognition", "facebook/mms-1b-all", "facebook/hf-seamless-m4t-medium", "facebook/seamless-m4t-v2-large",
              "IJyad/whisper-large-v3-Tarteel"],
    "Average WER⬇️": [4.99, 81.59, 1.33, 281.27, 48.13, 25.77,  26.2, 25.39],
    "Average CER":   [1.82, 76.65, 0.49,  73.18, 16.58, 15.38, 18.08, 17.75],
    "Tarteel WER":   [4.99, 81.59, 1.33, 281.27, 48.13, 25.77,  26.2, 25.39],
    "Tarteel CER":   [1.82, 76.65, 0.49,  73.18, 16.58, 15.38, 18.08, 17.75],
}

original_df = pd.DataFrame(results)
# original_df["Model"] = original_df["Model"].apply(lambda x: x.replace(x, make_clickable_model(x)))
original_df.sort_values(by="Average WER⬇️", inplace=True)

TYPES = ['str', 'number', 'number', 'number', 'number']

LEADERBOARD_CSS = """
html, body {
  overflow-y: auto !important;
}

#leaderboard-table th .header-content {
    min-width: 150px;
    white-space: nowrap;
}
"""

def request_model(model_text):
    return styled_message("🤗 Please launch a discussion in our GitHub repo, thank you. 🤗")

with gr.Blocks(fill_width=False, fill_height=False, css=LEADERBOARD_CSS) as demo:
    gr.HTML(BANNER, elem_id="banner")
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
            leaderboard_table = gr.Dataframe(
                value=original_df,
                datatype=TYPES,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
            )

        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
            gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")

        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
            with gr.Column():
                gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
                model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
                mdw_submission_result = gr.Markdown()
                btn_submit = gr.Button(value="🚀 Request")
                btn_submit.click(request_model, [model_name_textbox], mdw_submission_result)

    gr.Markdown(UPDATES, elem_classes="markdown-text")

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            gr.Textbox(
                value=CITATION_BUTTON_TEXT, lines=7,
                label="Copy the BibTeX snippet to cite this source",
                elem_id="citation-button",
                show_copy_button=True,
            )

demo.launch(allowed_paths=["banner.png"], ssr_mode=False)