badaoui HF Staff commited on
Commit
6caaf6d
·
1 Parent(s): d95b93e

new regressions panel

Browse files
Files changed (3) hide show
  1. app.py +55 -4
  2. data.py +121 -0
  3. styles.css +48 -0
app.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  import gradio as gr
5
  from gradio_toggle import Toggle
6
 
7
- from data import CIResults
8
  from utils import logger
9
  from summary_page import create_summary_page
10
  from model_page import plot_model_stats
@@ -107,6 +107,46 @@ def get_description_text():
107
  msg.append("*(loading...)*")
108
  return "<br>".join(msg)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Load CSS from external file
111
  def load_css():
112
  try:
@@ -266,6 +306,13 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
266
 
267
  # Main content area
268
  with gr.Column(scale=4, elem_classes=["main-content"]):
 
 
 
 
 
 
 
269
  # Current view components
270
  with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
271
  # Summary display (default view)
@@ -853,10 +900,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
853
  ],
854
  )
855
 
856
- # Auto-update CI links when the interface loads
 
 
 
 
857
  demo.load(
858
- fn=get_ci_links,
859
- outputs=[ci_links_display]
860
  )
861
 
862
 
 
4
  import gradio as gr
5
  from gradio_toggle import Toggle
6
 
7
+ from data import CIResults, find_new_regressions
8
  from utils import logger
9
  from summary_page import create_summary_page
10
  from model_page import plot_model_stats
 
107
  msg.append("*(loading...)*")
108
  return "<br>".join(msg)
109
 
110
+ # Function to format new regressions for display
111
+ def get_regressions_text():
112
+ """Get formatted text for new regressions panel."""
113
+ try:
114
+ regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
115
+
116
+ if not regressions:
117
+ return "### 🎉 No New Regressions\nAll failures were present in the previous run."
118
+
119
+ # Group by model and device
120
+ grouped = {}
121
+ for reg in regressions:
122
+ model = reg['model']
123
+ device = reg['device'].upper()
124
+ gpu_type = reg['gpu_type']
125
+ test = reg['test']
126
+
127
+ key = f"{model} ({device} {gpu_type})"
128
+ if key not in grouped:
129
+ grouped[key] = []
130
+ grouped[key].append(test)
131
+
132
+ # Format output
133
+ lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
134
+ lines.append("")
135
+
136
+ for key in sorted(grouped.keys()):
137
+ tests = grouped[key]
138
+ lines.append(f"**{key}:**")
139
+ for test in tests[:5]: # Limit to 5 tests per model
140
+ lines.append(f" • {test}")
141
+ if len(tests) > 5:
142
+ lines.append(f" • ... and {len(tests) - 5} more")
143
+ lines.append("")
144
+
145
+ return "\n".join(lines)
146
+ except Exception as e:
147
+ logger.error(f"Error getting regressions: {e}")
148
+ return "### ⚠️ New Regressions\n*Unable to load regression data*"
149
+
150
  # Load CSS from external file
151
  def load_css():
152
  try:
 
306
 
307
  # Main content area
308
  with gr.Column(scale=4, elem_classes=["main-content"]):
309
+ # New Regressions Panel (at the top for visibility)
310
+ regressions_panel = gr.Markdown(
311
+ value=get_regressions_text(),
312
+ elem_classes=["regressions-panel"],
313
+ visible=True
314
+ )
315
+
316
  # Current view components
317
  with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
318
  # Summary display (default view)
 
900
  ],
901
  )
902
 
903
+ # Auto-update CI links and regressions when the interface loads
904
+ def load_dashboard_data():
905
+ """Load both CI links and regressions data."""
906
+ return get_ci_links(), get_regressions_text()
907
+
908
  demo.load(
909
+ fn=load_dashboard_data,
910
+ outputs=[ci_links_display, regressions_panel]
911
  )
912
 
913
 
data.py CHANGED
@@ -431,6 +431,127 @@ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_n
431
  return None
432
 
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
435
  """Extract and process model data from DataFrame row."""
436
  # Handle missing values and get counts directly from dataframe
 
431
  return None
432
 
433
 
434
+ def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
435
+ """
436
+ Compare current failures against the previous day's failures to find new regressions.
437
+
438
+ Returns a list of dicts with:
439
+ - model: model name
440
+ - test: test name
441
+ - device: 'amd' or 'nvidia'
442
+ - gpu_type: 'single' or 'multi'
443
+ """
444
+ if current_df.empty or historical_df.empty:
445
+ return []
446
+
447
+ new_regressions = []
448
+
449
+ # Get the two most recent dates
450
+ available_dates = sorted(historical_df['date'].unique(), reverse=True)
451
+ if len(available_dates) < 2:
452
+ # Not enough history to compare
453
+ return []
454
+
455
+ today_date = available_dates[0]
456
+ yesterday_date = available_dates[1]
457
+
458
+ # Get data for both dates
459
+ today_data = historical_df[historical_df['date'] == today_date]
460
+ yesterday_data = historical_df[historical_df['date'] == yesterday_date]
461
+
462
+ # For each model, compare failures
463
+ for model_name in current_df.index:
464
+ model_name_lower = model_name.lower()
465
+
466
+ # Get today's failures
467
+ today_row = today_data[today_data.index == model_name_lower]
468
+ if today_row.empty:
469
+ continue
470
+ today_row = today_row.iloc[0]
471
+
472
+ # Get yesterday's failures (if exists)
473
+ yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
474
+ yesterday_failures_amd = {}
475
+ yesterday_failures_nvidia = {}
476
+
477
+ if not yesterday_row.empty:
478
+ yesterday_row = yesterday_row.iloc[0]
479
+ yesterday_failures_amd = yesterday_row.get('failures_amd', {})
480
+ yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
481
+
482
+ # Handle string/dict conversion
483
+ if isinstance(yesterday_failures_amd, str):
484
+ try:
485
+ yesterday_failures_amd = json.loads(yesterday_failures_amd)
486
+ except:
487
+ yesterday_failures_amd = {}
488
+ if isinstance(yesterday_failures_nvidia, str):
489
+ try:
490
+ yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
491
+ except:
492
+ yesterday_failures_nvidia = {}
493
+
494
+ # Get today's failures
495
+ today_failures_amd = today_row.get('failures_amd', {})
496
+ today_failures_nvidia = today_row.get('failures_nvidia', {})
497
+
498
+ # Handle string/dict conversion
499
+ if isinstance(today_failures_amd, str):
500
+ try:
501
+ today_failures_amd = json.loads(today_failures_amd)
502
+ except:
503
+ today_failures_amd = {}
504
+ if isinstance(today_failures_nvidia, str):
505
+ try:
506
+ today_failures_nvidia = json.loads(today_failures_nvidia)
507
+ except:
508
+ today_failures_nvidia = {}
509
+
510
+ # Check AMD failures
511
+ for gpu_type in ['single', 'multi']:
512
+ today_tests = today_failures_amd.get(gpu_type, [])
513
+ yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
514
+
515
+ # Get test names
516
+ today_test_names = {test.get('line', '') for test in today_tests}
517
+ yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
518
+
519
+ # Find new failures
520
+ new_tests = today_test_names - yesterday_test_names
521
+ for test_name in new_tests:
522
+ if test_name: # Skip empty names
523
+ new_regressions.append({
524
+ 'model': model_name,
525
+ 'test': test_name.split('::')[-1], # Short name
526
+ 'test_full': test_name, # Full name
527
+ 'device': 'amd',
528
+ 'gpu_type': gpu_type
529
+ })
530
+
531
+ # Check NVIDIA failures
532
+ for gpu_type in ['single', 'multi']:
533
+ today_tests = today_failures_nvidia.get(gpu_type, [])
534
+ yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
535
+
536
+ # Get test names
537
+ today_test_names = {test.get('line', '') for test in today_tests}
538
+ yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
539
+
540
+ # Find new failures
541
+ new_tests = today_test_names - yesterday_test_names
542
+ for test_name in new_tests:
543
+ if test_name: # Skip empty names
544
+ new_regressions.append({
545
+ 'model': model_name,
546
+ 'test': test_name.split('::')[-1], # Short name
547
+ 'test_full': test_name, # Full name
548
+ 'device': 'nvidia',
549
+ 'gpu_type': gpu_type
550
+ })
551
+
552
+ return new_regressions
553
+
554
+
555
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
556
  """Extract and process model data from DataFrame row."""
557
  # Handle missing values and get counts directly from dataframe
styles.css CHANGED
@@ -594,6 +594,54 @@ h1, h2, h3, p, .markdown {
594
  flex-direction: column !important;
595
  }
596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  /* Custom scrollbar for main content */
598
  .main-content {
599
  scrollbar-width: thin !important;
 
594
  flex-direction: column !important;
595
  }
596
 
597
+ /* New Regressions Panel */
598
+ .regressions-panel {
599
+ background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
600
+ border: 2px solid #8B4513 !important;
601
+ border-radius: 8px !important;
602
+ padding: 15px 20px !important;
603
+ margin: 15px 0px !important;
604
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
605
+ animation: pulse-border 2s ease-in-out infinite !important;
606
+ }
607
+
608
+ .regressions-panel h3 {
609
+ color: #FFB86C !important;
610
+ font-family: monospace !important;
611
+ font-size: 16px !important;
612
+ font-weight: bold !important;
613
+ margin: 0 0 10px 0 !important;
614
+ display: flex !important;
615
+ align-items: center !important;
616
+ }
617
+
618
+ .regressions-panel p,
619
+ .regressions-panel ul,
620
+ .regressions-panel li {
621
+ color: #FFFFFF !important;
622
+ font-family: monospace !important;
623
+ font-size: 13px !important;
624
+ line-height: 1.6 !important;
625
+ margin: 4px 0 !important;
626
+ }
627
+
628
+ .regressions-panel strong {
629
+ color: #FF6B6B !important;
630
+ font-weight: 600 !important;
631
+ }
632
+
633
+ /* Pulse animation for new regressions */
634
+ @keyframes pulse-border {
635
+ 0%, 100% {
636
+ border-color: #8B4513;
637
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
638
+ }
639
+ 50% {
640
+ border-color: #B8621B;
641
+ box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
642
+ }
643
+ }
644
+
645
  /* Custom scrollbar for main content */
646
  .main-content {
647
  scrollbar-width: thin !important;