Tcid

Running

App Files Files Community

badaoui HF Staff commited on Oct 28

Commit

6caaf6d

1 Parent(s): d95b93e

new regressions panel

Browse files

Files changed (3) hide show

app.py +55 -4
data.py +121 -0
styles.css +48 -0

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import gradio as gr
 from gradio_toggle import Toggle
-from data import CIResults
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
@@ -107,6 +107,46 @@ def get_description_text():
         msg.append("*(loading...)*")
     return "<br>".join(msg)
 # Load CSS from external file
 def load_css():
     try:
@@ -266,6 +306,13 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
             # Current view components
             with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
                 # Summary display (default view)
@@ -853,10 +900,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
             ],
         )
-    # Auto-update CI links when the interface loads
     demo.load(
-        fn=get_ci_links,
-        outputs=[ci_links_display]
     )

 import gradio as gr
 from gradio_toggle import Toggle
+from data import CIResults, find_new_regressions
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
         msg.append("*(loading...)*")
     return "<br>".join(msg)
+# Function to format new regressions for display
+def get_regressions_text():
+    """Get formatted text for new regressions panel."""
+    try:
+        regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
+        if not regressions:
+            return "### 🎉 No New Regressions\nAll failures were present in the previous run."
+        # Group by model and device
+        grouped = {}
+        for reg in regressions:
+            model = reg['model']
+            device = reg['device'].upper()
+            gpu_type = reg['gpu_type']
+            test = reg['test']
+            key = f"{model} ({device} {gpu_type})"
+            if key not in grouped:
+                grouped[key] = []
+            grouped[key].append(test)
+        # Format output
+        lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
+        lines.append("")
+        for key in sorted(grouped.keys()):
+            tests = grouped[key]
+            lines.append(f"**{key}:**")
+            for test in tests[:5]:  # Limit to 5 tests per model
+                lines.append(f"  • {test}")
+            if len(tests) > 5:
+                lines.append(f"  • ... and {len(tests) - 5} more")
+            lines.append("")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Error getting regressions: {e}")
+        return "### ⚠️ New Regressions\n*Unable to load regression data*"
 # Load CSS from external file
 def load_css():
     try:
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
+            # New Regressions Panel (at the top for visibility)
+            regressions_panel = gr.Markdown(
+                value=get_regressions_text(),
+                elem_classes=["regressions-panel"],
+                visible=True
+            )
             # Current view components
             with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
                 # Summary display (default view)
             ],
         )
+    # Auto-update CI links and regressions when the interface loads
+    def load_dashboard_data():
+        """Load both CI links and regressions data."""
+        return get_ci_links(), get_regressions_text()
     demo.load(
+        fn=load_dashboard_data,
+        outputs=[ci_links_display, regressions_panel]
     )

data.py CHANGED Viewed

@@ -431,6 +431,127 @@ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_n
         return None
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe

         return None
+def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
+    """
+    Compare current failures against the previous day's failures to find new regressions.
+    Returns a list of dicts with:
+    - model: model name
+    - test: test name
+    - device: 'amd' or 'nvidia'
+    - gpu_type: 'single' or 'multi'
+    """
+    if current_df.empty or historical_df.empty:
+        return []
+    new_regressions = []
+    # Get the two most recent dates
+    available_dates = sorted(historical_df['date'].unique(), reverse=True)
+    if len(available_dates) < 2:
+        # Not enough history to compare
+        return []
+    today_date = available_dates[0]
+    yesterday_date = available_dates[1]
+    # Get data for both dates
+    today_data = historical_df[historical_df['date'] == today_date]
+    yesterday_data = historical_df[historical_df['date'] == yesterday_date]
+    # For each model, compare failures
+    for model_name in current_df.index:
+        model_name_lower = model_name.lower()
+        # Get today's failures
+        today_row = today_data[today_data.index == model_name_lower]
+        if today_row.empty:
+            continue
+        today_row = today_row.iloc[0]
+        # Get yesterday's failures (if exists)
+        yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
+        yesterday_failures_amd = {}
+        yesterday_failures_nvidia = {}
+        if not yesterday_row.empty:
+            yesterday_row = yesterday_row.iloc[0]
+            yesterday_failures_amd = yesterday_row.get('failures_amd', {})
+            yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
+            # Handle string/dict conversion
+            if isinstance(yesterday_failures_amd, str):
+                try:
+                    yesterday_failures_amd = json.loads(yesterday_failures_amd)
+                except:
+                    yesterday_failures_amd = {}
+            if isinstance(yesterday_failures_nvidia, str):
+                try:
+                    yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
+                except:
+                    yesterday_failures_nvidia = {}
+        # Get today's failures
+        today_failures_amd = today_row.get('failures_amd', {})
+        today_failures_nvidia = today_row.get('failures_nvidia', {})
+        # Handle string/dict conversion
+        if isinstance(today_failures_amd, str):
+            try:
+                today_failures_amd = json.loads(today_failures_amd)
+            except:
+                today_failures_amd = {}
+        if isinstance(today_failures_nvidia, str):
+            try:
+                today_failures_nvidia = json.loads(today_failures_nvidia)
+            except:
+                today_failures_nvidia = {}
+        # Check AMD failures
+        for gpu_type in ['single', 'multi']:
+            today_tests = today_failures_amd.get(gpu_type, [])
+            yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
+            # Get test names
+            today_test_names = {test.get('line', '') for test in today_tests}
+            yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
+            # Find new failures
+            new_tests = today_test_names - yesterday_test_names
+            for test_name in new_tests:
+                if test_name:  # Skip empty names
+                    new_regressions.append({
+                        'model': model_name,
+                        'test': test_name.split('::')[-1],  # Short name
+                        'test_full': test_name,  # Full name
+                        'device': 'amd',
+                        'gpu_type': gpu_type
+                    })
+        # Check NVIDIA failures
+        for gpu_type in ['single', 'multi']:
+            today_tests = today_failures_nvidia.get(gpu_type, [])
+            yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
+            # Get test names
+            today_test_names = {test.get('line', '') for test in today_tests}
+            yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
+            # Find new failures
+            new_tests = today_test_names - yesterday_test_names
+            for test_name in new_tests:
+                if test_name:  # Skip empty names
+                    new_regressions.append({
+                        'model': model_name,
+                        'test': test_name.split('::')[-1],  # Short name
+                        'test_full': test_name,  # Full name
+                        'device': 'nvidia',
+                        'gpu_type': gpu_type
+                    })
+    return new_regressions
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe

styles.css CHANGED Viewed

@@ -594,6 +594,54 @@ h1, h2, h3, p, .markdown {
     flex-direction: column !important;
 }
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;

     flex-direction: column !important;
 }
+/* New Regressions Panel */
+.regressions-panel {
+    background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
+    border: 2px solid #8B4513 !important;
+    border-radius: 8px !important;
+    padding: 15px 20px !important;
+    margin: 15px 0px !important;
+    box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
+    animation: pulse-border 2s ease-in-out infinite !important;
+}
+.regressions-panel h3 {
+    color: #FFB86C !important;
+    font-family: monospace !important;
+    font-size: 16px !important;
+    font-weight: bold !important;
+    margin: 0 0 10px 0 !important;
+    display: flex !important;
+    align-items: center !important;
+}
+.regressions-panel p,
+.regressions-panel ul,
+.regressions-panel li {
+    color: #FFFFFF !important;
+    font-family: monospace !important;
+    font-size: 13px !important;
+    line-height: 1.6 !important;
+    margin: 4px 0 !important;
+}
+.regressions-panel strong {
+    color: #FF6B6B !important;
+    font-weight: 600 !important;
+}
+/* Pulse animation for new regressions */
+@keyframes pulse-border {
+    0%, 100% {
+        border-color: #8B4513;
+        box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
+    }
+    50% {
+        border-color: #B8621B;
+        box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
+    }
+}
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;