Spaces:

auto-cap
/

MoE-CAP-Dashboard

Sleeping

App Files Files Community

AppleSwing commited on Nov 20

Commit

1d0cc7f

verified ·

1 Parent(s): 545e209

Update app.py

Browse files

Files changed (1) hide show

app.py +367 -102

app.py CHANGED Viewed

@@ -193,7 +193,6 @@ def load_from_dir(
         lower_selected = [x.lower() for x in selected_tasks]
         df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
     # Inference framework filter (Method)
     if selected_frameworks is not None:
         lower_selected = [str(x).lower() for x in selected_frameworks]
@@ -216,7 +215,6 @@ def load_from_dir(
     df = df.fillna("-")
     raw_models = set()
     for cell in df["Model"].tolist():
         if isinstance(cell, str) and "href" in cell:
             try:
@@ -236,11 +234,6 @@ def load_from_dir(
             links.append(str(name))
     models_str = ", ".join(links)
-    # summary_md = (
-    #     f"**Loaded {len(df)} result files from dataset `{dir_path}`.** \n"
-    #     f"**Models:** {models_str}"
-    # )
     table_html = df.to_html(escape=False, index=False, classes="metrics-table")
     return table_html
@@ -265,131 +258,403 @@ def auto_refresh_from_dir(
 # Gradio UI
 def build_app() -> gr.Blocks:
-    row_css = """
-    .gradio-container table.metrics-table th,
-    .gradio-container table.metrics-table td {
-        padding-top: 10px;
-        padding-bottom: 10px;
-        padding-left: 8px;
-        padding-right: 8px;
-        border: 1px solid #e5e7eb;
-    }
-    .gradio-container table.metrics-table {
-        border-collapse: collapse;
         width: 100%;
     }
     """
-    with gr.Blocks(title="MoE-CAP Dashboard", css=row_css) as demo:
-        gr.Markdown("# MoE-CAP Dashboard")
-        with gr.Row():
-            with gr.Column(scale=1):
                 gr.Markdown(
-                    "### Tasks\n"
-                    "- Mathematics Problem-Solving Performance — "
-                    "[**GSM8K**](https://arxiv.org/abs/2110-14168)\n\n"
-                    "- Long-Context Understanding — "
-                    "[**LongBench**](https://arxiv.org/abs/2412.15204)\n"
-                    "- Massive Multitask Language Understanding — "
-                    "[**MMLU**](https://arxiv.org/abs/2009.03300)\n"
-                    "- Mathematical Reasoning — "
-                    "[**NuminaMath**](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf)\n"
-                    "- Extreme Long-Context Evaluation — "
-                    "[**RULER**](https://arxiv.org/abs/2404.06654)\n\n"
-                    "### Columns and Metrics\n"
-                    "- End-to-End Latency (s)  \n"
-                    "- Batch Size  \n"
-                    "- GPU Type  \n"
-                    "- Accuracy (%)  \n"
-                    "- Cost ($)  \n"
-                    "- Decoding Throughput (tokens/s)  \n"
-                    "- Prefill Throughput (tokens/s)  \n"
-                    "- Prefill S-MBU (%)  \n"
-                    "- Prefill S-MFU (%)  \n"
-                    "- Decoding S-MBU (%)  \n"
-                    "- Decoding S-MFU (%)  \n"
-                    "- TTFT (s)  \n"
-                    "- TPOT (s)"
                 )
-            with gr.Column(scale=1):
-                dir_path = gr.State(RESULT_DIR)
-                # 1) Tasks filter
-                task_filter = gr.CheckboxGroup(
-                    label="Tasks",
-                    choices=[
-                        ("GSM8K", "gsm8k"),
-                        ("LongBench", "longbench"),
-                        ("MMLU", "mmlu"),
-                        ("NuminaMath", "numinamath"),
-                        ("RULER", "ruler")
-                    ],
-                    value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
-                )
-                # 2) Inference frameworks filter
-                framework_filter = gr.CheckboxGroup(
-                    label="Inference frameworks",
-                    choices=["sglang", "vllm"],
-                    value=["sglang", "vllm"],
-                )
-                # 3) Model types filter
-                model_type_filter = gr.CheckboxGroup(
-                    label="Model types",
-                    choices=["instruct", "thinking"],
-                    value=["instruct", "thinking"],
-                )
-                # 4) Precision filter
-                precision_filter = gr.CheckboxGroup(
-                    label="Precision",
-                    choices=["bfloat16", "fp8"],
-                    value=["bfloat16", "fp8"],
-                )
-        # summary_output = gr.Markdown(label="Directory Summary")
-        leaderboard_output = gr.HTML(label="Directory Metrics")
-        # demo.load(
-        #     fn=load_from_dir,
-        #     inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
-        #     outputs=[leaderboard_output],
-        # )
         demo.load(
-            fn=auto_refresh_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         task_filter.change(
-            fn=load_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         framework_filter.change(
-            fn=load_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         model_type_filter.change(
-            fn=load_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         precision_filter.change(
-            fn=load_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         timer = gr.Timer(60.0)
         timer.tick(
-            fn=auto_refresh_from_dir,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )

         lower_selected = [x.lower() for x in selected_tasks]
         df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
     # Inference framework filter (Method)
     if selected_frameworks is not None:
         lower_selected = [str(x).lower() for x in selected_frameworks]
     df = df.fillna("-")
     raw_models = set()
     for cell in df["Model"].tolist():
         if isinstance(cell, str) and "href" in cell:
             try:
             links.append(str(name))
     models_str = ", ".join(links)
     table_html = df.to_html(escape=False, index=False, classes="metrics-table")
     return table_html
 # Gradio UI
 def build_app() -> gr.Blocks:
+    # Enhanced CSS with better layout and scrollable table
+    custom_css = """
+    /* Global container styling */
+    .gradio-container {
+        max-width: 100% !important;
+        padding: 0 !important;
+    }
+    /* Header styling */
+    .header-container {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 1.5rem 2rem;
+        margin: 0;
+        border-radius: 0;
+        color: white;
+        box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+    }
+    .header-container h1 {
+        color: white !important;
+        margin: 0;
+        font-size: 2rem;
+        font-weight: 600;
+    }
+    .header-subtitle {
+        color: rgba(255,255,255,0.9);
+        margin-top: 0.5rem;
+        font-size: 0.95rem;
+    }
+    /* Main content area */
+    .main-content {
+        display: flex;
+        height: calc(100vh - 120px);
+        gap: 1rem;
+        padding: 1rem;
+        background: #f8f9fa;
+    }
+    /* Sidebar styling */
+    .sidebar-container {
+        background: white;
+        border-radius: 8px;
+        padding: 1.5rem;
+        box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        overflow-y: auto;
+        max-height: 100%;
+        width: 350px;
+        flex-shrink: 0;
+    }
+    .sidebar-section {
+        margin-bottom: 1.5rem;
+    }
+    .sidebar-section h3 {
+        font-size: 1.1rem;
+        font-weight: 600;
+        color: #2d3748;
+        margin-bottom: 0.75rem;
+        padding-bottom: 0.5rem;
+        border-bottom: 2px solid #e2e8f0;
+    }
+    /* Filter styling */
+    .filter-group {
+        background: #f7fafc;
+        border-radius: 6px;
+        padding: 0.75rem;
+        margin-bottom: 1rem;
+    }
+    .filter-group label {
+        font-weight: 500;
+        color: #4a5568;
+        font-size: 0.9rem;
+        margin-bottom: 0.5rem;
+        display: block;
+    }
+    /* Table container */
+    .table-container {
+        flex: 1;
+        background: white;
+        border-radius: 8px;
+        padding: 1.5rem;
+        box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        display: flex;
+        flex-direction: column;
+        min-width: 0;
+    }
+    /* Stats bar */
+    .stats-bar {
+        display: flex;
+        gap: 2rem;
+        padding: 1rem;
+        background: #f7fafc;
+        border-radius: 6px;
+        margin-bottom: 1rem;
+        align-items: center;
+    }
+    .stat-item {
+        display: flex;
+        flex-direction: column;
+    }
+    .stat-label {
+        font-size: 0.8rem;
+        color: #718096;
+        text-transform: uppercase;
+        letter-spacing: 0.05em;
+    }
+    .stat-value {
+        font-size: 1.5rem;
+        font-weight: 600;
+        color: #2d3748;
+    }
+    /* Scrollable table wrapper */
+    .table-wrapper {
+        flex: 1;
+        overflow: auto;
+        border: 1px solid #e2e8f0;
+        border-radius: 6px;
+        max-height: calc(100vh - 280px);
+    }
+    /* Table styling */
+    table.metrics-table {
         width: 100%;
+        border-collapse: separate;
+        border-spacing: 0;
+        font-size: 0.9rem;
+    }
+    table.metrics-table thead {
+        position: sticky;
+        top: 0;
+        background: linear-gradient(to bottom, #f7fafc, #edf2f7);
+        z-index: 10;
+    }
+    table.metrics-table th {
+        padding: 0.75rem;
+        text-align: left;
+        font-weight: 600;
+        color: #2d3748;
+        border-bottom: 2px solid #cbd5e0;
+        white-space: nowrap;
+        font-size: 0.85rem;
+        text-transform: uppercase;
+        letter-spacing: 0.05em;
+    }
+    table.metrics-table td {
+        padding: 0.75rem;
+        border-bottom: 1px solid #e2e8f0;
+        color: #4a5568;
+    }
+    table.metrics-table tbody tr:hover {
+        background-color: #f7fafc;
+        transition: background-color 0.2s;
+    }
+    table.metrics-table tbody tr:last-child td {
+        border-bottom: none;
+    }
+    /* Model links */
+    table.metrics-table a {
+        color: #4c6ef5;
+        text-decoration: none;
+        font-weight: 500;
+    }
+    table.metrics-table a:hover {
+        text-decoration: underline;
+    }
+    /* Empty state */
+    .empty-state {
+        display: flex;
+        flex-direction: column;
+        align-items: center;
+        justify-content: center;
+        height: 400px;
+        color: #718096;
+    }
+    .empty-state p {
+        font-size: 1.1rem;
+        margin-top: 1rem;
+    }
+    /* Responsive adjustments */
+    @media (max-width: 1024px) {
+        .main-content {
+            flex-direction: column;
+            height: auto;
+        }
+        .sidebar-container {
+            width: 100%;
+            max-height: none;
+        }
+        .table-wrapper {
+            max-height: 500px;
+        }
+    }
+    /* Checkbox group styling */
+    .gradio-checkbox-group {
+        display: flex;
+        flex-direction: column;
+        gap: 0.5rem;
+    }
+    .gradio-checkbox-group label {
+        display: flex;
+        align-items: center;
+        padding: 0.25rem;
+        border-radius: 4px;
+        transition: background-color 0.2s;
+    }
+    .gradio-checkbox-group label:hover {
+        background-color: #edf2f7;
+    }
+    /* Loading indicator */
+    .loading-indicator {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        padding: 2rem;
+        color: #718096;
+    }
+    /* Hide Gradio footer */
+    footer {
+        display: none !important;
     }
     """
+    with gr.Blocks(title="MoE-CAP Dashboard", css=custom_css) as demo:
+        # Header
+        with gr.Row(elem_classes="header-container"):
+            with gr.Column():
                 gr.Markdown(
+                    """# 🚀 MoE-CAP Dashboard
+                    <div class="header-subtitle">Comprehensive Model Performance Metrics and Benchmarks</div>
+                    """,
+                    elem_classes="header-title"
                 )
+        # Main content area
+        with gr.Row(elem_classes="main-content"):
+            # Sidebar
+            with gr.Column(scale=1, elem_classes="sidebar-container"):
+                # Filters section
+                with gr.Group(elem_classes="sidebar-section"):
+                    gr.Markdown("### 🎯 Filters", elem_classes="filter-header")
+                    dir_path = gr.State(RESULT_DIR)
+                    # Task filter
+                    with gr.Group(elem_classes="filter-group"):
+                        task_filter = gr.CheckboxGroup(
+                            label="📊 Tasks",
+                            choices=[
+                                ("GSM8K", "gsm8k"),
+                                ("LongBench", "longbench"),
+                                ("MMLU", "mmlu"),
+                                ("NuminaMath", "numinamath"),
+                                ("RULER", "ruler")
+                            ],
+                            value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
+                        )
+                    # Framework filter
+                    with gr.Group(elem_classes="filter-group"):
+                        framework_filter = gr.CheckboxGroup(
+                            label="⚙️ Inference Frameworks",
+                            choices=["sglang", "vllm"],
+                            value=["sglang", "vllm"],
+                        )
+                    # Model type filter
+                    with gr.Group(elem_classes="filter-group"):
+                        model_type_filter = gr.CheckboxGroup(
+                            label="🤖 Model Types",
+                            choices=["instruct", "thinking"],
+                            value=["instruct", "thinking"],
+                        )
+                    # Precision filter
+                    with gr.Group(elem_classes="filter-group"):
+                        precision_filter = gr.CheckboxGroup(
+                            label="🎚️ Precision",
+                            choices=["bfloat16", "fp8"],
+                            value=["bfloat16", "fp8"],
+                        )
+                # Information section
+                with gr.Group(elem_classes="sidebar-section"):
+                    gr.Markdown("### 📖 About")
+                    gr.Markdown(
+                        """
+                        **Benchmarks:**
+                        - [GSM8K](https://arxiv.org/abs/2110.14168) - Math Problem-Solving
+                        - [LongBench](https://arxiv.org/abs/2412.15204) - Long-Context Understanding
+                        - [MMLU](https://arxiv.org/abs/2009.03300) - Multitask Understanding
+                        - [NuminaMath](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf) - Mathematical Reasoning
+                        - [RULER](https://arxiv.org/abs/2404.06654) - Extreme Long-Context
+                        **Key Metrics:**
+                        - E2E Latency, Throughput, Accuracy
+                        - S-MBU/S-MFU Performance
+                        - TTFT/TPOT Timing
+                        """,
+                        elem_classes="info-text"
+                    )
+            # Table area
+            with gr.Column(scale=3, elem_classes="table-container"):
+                # Stats summary (optional - you can populate this with actual stats)
+                with gr.Row(elem_classes="stats-bar", visible=False):
+                    with gr.Column(elem_classes="stat-item"):
+                        gr.Markdown('<div class="stat-label">Total Models</div><div class="stat-value">0</div>')
+                    with gr.Column(elem_classes="stat-item"):
+                        gr.Markdown('<div class="stat-label">Avg Accuracy</div><div class="stat-value">0%</div>')
+                    with gr.Column(elem_classes="stat-item"):
+                        gr.Markdown('<div class="stat-label">Best E2E</div><div class="stat-value">0s</div>')
+                # Scrollable table
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### 📊 Performance Metrics")
+                        leaderboard_output = gr.HTML(
+                            label="Metrics Table",
+                            elem_classes="table-wrapper"
+                        )
+        # Wrap table HTML in scrollable div
+        def wrap_table_html(html):
+            if html and "table" in html:
+                return f'<div class="table-wrapper">{html}</div>'
+            return html
+        # Modified load function to wrap table
+        def load_from_dir_wrapped(*args, **kwargs):
+            result = load_from_dir(*args, **kwargs)
+            return wrap_table_html(result)
+        def auto_refresh_from_dir_wrapped(*args, **kwargs):
+            result = auto_refresh_from_dir(*args, **kwargs)
+            return wrap_table_html(result)
+        # Load initial data
         demo.load(
+            fn=auto_refresh_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
+        # Filter change handlers
         task_filter.change(
+            fn=load_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         framework_filter.change(
+            fn=load_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         model_type_filter.change(
+            fn=load_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
         precision_filter.change(
+            fn=load_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )
+        # Auto-refresh timer
         timer = gr.Timer(60.0)
         timer.tick(
+            fn=auto_refresh_from_dir_wrapped,
             inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
             outputs=[leaderboard_output],
         )