Spaces:
Running
Running
| import json | |
| import os | |
| import re | |
| from datetime import datetime | |
| from typing import Tuple | |
| import pandas as pd | |
| from bs4 import BeautifulSoup | |
| def format_datetime(dt_str: str) -> str: | |
| """ | |
| Format a datetime string for display. | |
| :param dt_str: String representing a datetime in ISO format | |
| :return: Formatted datetime string | |
| """ | |
| return dt_str.replace("T", " ").split("+")[0] | |
| def read_json_line_by_line(file_path, commit_hash=None): | |
| """ | |
| Read a JSON file line by line, parsing each line as a separate JSON object. | |
| Optionally filter by commit_hash if provided. | |
| :param file_path: Path to the JSON file | |
| :param commit_hash: Optional commit hash to filter data | |
| :return: List of parsed JSON objects | |
| """ | |
| data = [] | |
| with open(file_path, "r") as f: | |
| for line in f: | |
| try: | |
| item = json.loads(line.strip()) | |
| # Filter by commit_hash if provided | |
| if commit_hash is None or item.get("commit_hash") == commit_hash: | |
| data.append(item) | |
| except json.JSONDecodeError: | |
| print(f"Skipping invalid JSON in {file_path}: {line}") | |
| return data | |
| def calculate_change(new: float, old: float, metric_name: str) -> Tuple[float, str]: | |
| """Calculate percentage change and return with appropriate emoji.""" | |
| pct_change = new - old | |
| if abs(pct_change) < 1: | |
| emoji = "βοΈ" | |
| elif pct_change > 0: | |
| emoji = "π’" if "wer" not in metric_name.lower() else "β" | |
| else: | |
| emoji = "β" if "wer" not in metric_name.lower() else "π’" | |
| return (pct_change, emoji) | |
| def has_changes(config, prev_dict, curr_dict): | |
| """Check if any metrics have changed.""" | |
| curr = curr_dict[config] | |
| prev = prev_dict[config] | |
| metrics = ["speed", "tokens_per_second", "average_wer", "qoi"] | |
| for key in metrics: | |
| if key in curr and key in prev: | |
| curr_val = curr[key] | |
| prev_val = prev[key] | |
| if abs(curr_val - prev_val) >= 1: # 1% threshold | |
| return True | |
| return False | |
| def format_metrics_table(config, prev_dict, curr_dict, improved, regressed): | |
| """Format metrics into a table string and track improvements/regressions.""" | |
| curr = curr_dict[config] | |
| prev = prev_dict[config] | |
| # improved = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
| # regressed = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
| metrics = [ | |
| ("Speed", "speed"), | |
| ("Tok/s", "tokens_per_second"), | |
| ("WER", "average_wer"), | |
| ("QoI", "qoi"), | |
| ] | |
| table = "```\nMetric Previous Current Change\n--------------------------------\n" | |
| for metric_name, key in metrics: | |
| if key in curr and key in prev: | |
| curr_val = curr[key] | |
| prev_val = prev[key] | |
| pct_change, _ = calculate_change(curr_val, prev_val, metric_name) | |
| if abs(pct_change) >= 1: # Only show metrics with changes | |
| table += f"{metric_name:<9} {prev_val:<11.2f} {curr_val:<10.2f} {pct_change:.2f}\n" | |
| # Track improvements/regressions | |
| if pct_change > 0: | |
| if "wer" not in metric_name.lower(): | |
| improved[key] += 1 | |
| else: | |
| regressed[key] += 1 | |
| else: | |
| if "wer" not in metric_name.lower(): | |
| regressed[key] += 1 | |
| else: | |
| improved[key] += 1 | |
| table += "```" | |
| return table | |
| def extract_status_and_os(cell_value): | |
| """ | |
| Extract status and OS versions from a cell, handling both HTML and plain text. | |
| Returns list of tuples: [(status, os_version), ...] | |
| """ | |
| results = [] | |
| cell_value = str(cell_value) | |
| # First, handle the case where there's no HTML tags | |
| if cell_value == "Not Supported": | |
| return results | |
| # Split the cell into parts (first element and subsequent <p> elements) | |
| parts = cell_value.split("<p>") | |
| for part in parts: | |
| part = part.strip("</p>") | |
| if not part: | |
| continue | |
| # Check if part contains warning symbol | |
| if "β οΈ" in part: | |
| # Parse HTML to extract OS version from anchor tag | |
| soup = BeautifulSoup(part, "html.parser") | |
| # Find text after href that contains OS version | |
| text = soup.get_text() | |
| os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", text) | |
| if os_match: | |
| os_version = os_match.group(0) | |
| results.append(("β οΈ", os_version)) | |
| else: | |
| # For success cases, OS version is directly in the text | |
| os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", part) | |
| if os_match: | |
| os_version = os_match.group(0) | |
| results.append(("β ", os_version)) | |
| return results | |
| def escape_string(s: str) -> str: | |
| """Escape a string to be used as a value in JSON.""" | |
| return ( | |
| s.replace("\\", "\\\\") | |
| .replace('"', '\\"') | |
| .replace("\n", "\\n") | |
| .replace("\r", "\\r") | |
| ) | |
| def analyze_support_changes(prev_csv, curr_csv): | |
| """Analyze support changes between CSV files.""" | |
| # Read CSV files | |
| prev_df = pd.read_csv(prev_csv) | |
| prev_df.set_index(prev_df.columns[0], inplace=True) | |
| curr_df = pd.read_csv(curr_csv) | |
| curr_df.set_index(curr_df.columns[0], inplace=True) | |
| # Get device lists (excluding first column which is the index) | |
| prev_devices = sorted(prev_df.columns[1:]) | |
| curr_devices = sorted(curr_df.columns[1:]) | |
| # Calculate device ratio | |
| device_ratio = len(curr_devices) / len(prev_devices) if prev_devices else 1 | |
| needs_alert = device_ratio < 0.9 # Alert if less than 90% of previous devices | |
| # Convert to dictionary for easier comparison | |
| prev_status = {} | |
| curr_status = {} | |
| # Process previous data | |
| for idx in range(len(prev_df)): | |
| model = prev_df.index[idx] | |
| for col_idx in range(1, len(prev_df.columns)): | |
| cell_value = prev_df.iloc[idx, col_idx] | |
| device = prev_df.columns[col_idx] | |
| statuses = extract_status_and_os(cell_value) | |
| for status, os_version in statuses: | |
| prev_status[(model, device, os_version)] = status | |
| # Process current data and track new configurations | |
| new_configs = [] | |
| for idx in range(len(curr_df)): | |
| model = curr_df.index[idx] | |
| for col_idx in range(1, len(curr_df.columns)): | |
| cell_value = curr_df.iloc[idx, col_idx] | |
| device = curr_df.columns[col_idx] | |
| statuses = extract_status_and_os(cell_value) | |
| for status, os_version in statuses: | |
| curr_status[(model, device, os_version)] = status | |
| # Check if this is a new configuration | |
| if (model, device, os_version) not in prev_status: | |
| new_configs.append((model, device, os_version)) | |
| # Find changes | |
| fixed_errors = [] | |
| new_errors = [] | |
| # Check all configurations that exist in both datasets | |
| common_configs = set(prev_status.keys()) & set(curr_status.keys()) | |
| for config in common_configs: | |
| model, device, os_version = config | |
| if prev_status[config] == "β οΈ" and curr_status[config] == "β ": | |
| fixed_errors.append((model, device, os_version)) | |
| elif prev_status[config] == "β " and curr_status[config] == "β οΈ": | |
| new_errors.append((model, device, os_version)) | |
| return fixed_errors, new_errors, new_configs, needs_alert | |
| def generate_report(): | |
| # Load version data first to get commit hashes | |
| with open("report_data/version.json", "r") as f: | |
| version_data = json.load(f) | |
| # Get the last two commit hashes from releases array | |
| releases = version_data.get("releases", []) | |
| if len(releases) >= 2: | |
| curr_commit_hash = releases[-1] # latest commit | |
| prev_commit_hash = releases[-2] # previous commit | |
| else: | |
| curr_commit_hash = releases[-1] if releases else "" | |
| prev_commit_hash = "" | |
| # Load and filter performance data by commit hash | |
| prev_perf_data = read_json_line_by_line("dashboard_data/performance_data.json", commit_hash=prev_commit_hash) | |
| curr_perf_data = read_json_line_by_line("report_data/performance_data.json", commit_hash=curr_commit_hash) | |
| prev_dict = {(d["model"], d["device"], d["os"]): d for d in prev_perf_data} | |
| curr_dict = {(d["model"], d["device"], d["os"]): d for d in curr_perf_data} | |
| common_configs = set(curr_dict.keys()) & set(prev_dict.keys()) | |
| # Load version data | |
| with open("dashboard_data/version.json", "r") as f: | |
| prev_version = json.load(f) | |
| with open("report_data/version.json", "r") as f: | |
| curr_version = json.load(f) | |
| prev_releases = set(prev_version.get("releases", [])) | |
| curr_releases = set(curr_version.get("releases", [])) | |
| new_releases = curr_releases - prev_releases | |
| removed_releases = prev_releases - curr_releases | |
| # Track metrics | |
| improved_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
| regressed_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} | |
| new_data_points = len(set(curr_dict.keys()) - set(prev_dict.keys())) | |
| # Analyze support changes | |
| fixed_errors, new_errors, new_configs, needs_alert = analyze_support_changes( | |
| "report_data/support_data.csv", "dashboard_data/support_data.csv" | |
| ) | |
| # Create Slack blocks | |
| current_time = datetime.now().strftime("%B %-d, %Y %H:%M:%S") | |
| prev_release_tag, curr_release_tag = ( | |
| prev_version["versions"][-1] if prev_version["versions"] else "N/A", | |
| curr_version["versions"][-1], | |
| ) | |
| slack_blocks = { | |
| "blocks": [ | |
| { | |
| "type": "header", | |
| "text": { | |
| "type": "plain_text", | |
| "text": "π WhisperKit Dataset Update Report π", | |
| "emoji": True, | |
| }, | |
| }, | |
| { | |
| "type": "context", | |
| "elements": [{"text": f"*{current_time}*", "type": "mrkdwn"}], | |
| }, | |
| {"type": "divider"}, | |
| { | |
| "type": "section", | |
| "text": {"type": "mrkdwn", "text": "βΉοΈ *CURRENT VERSION INFO* βΉοΈ"}, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Last Modified:* `{format_datetime(curr_version['last_modified'])}`", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Dataset SHA:* `{curr_version['sha']}`", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Current Releases:* {', '.join(f'`{r}`' for r in curr_version['releases'])}", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Current Release Tag:* `{curr_release_tag}`", | |
| }, | |
| }, | |
| {"type": "divider"}, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "π *SUMMARY OF PERFORMANCE UPDATES* π", | |
| }, | |
| }, | |
| ] | |
| } | |
| # Add release information | |
| slack_blocks["blocks"].extend( | |
| [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Added Releases:* {', '.join(sorted(new_releases)) if new_releases else 'None'}", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Removed Releases:* {', '.join(sorted(removed_releases)) if removed_releases else 'None'}", | |
| }, | |
| }, | |
| ] | |
| ) | |
| if prev_release_tag != curr_release_tag: | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *Release Tag Change:* `{prev_release_tag}` β `{curr_release_tag}`", | |
| }, | |
| } | |
| ) | |
| slack_blocks["blocks"].extend( | |
| [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "\n", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *New Data Points:* `{new_data_points}` new configurations", | |
| }, | |
| }, | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "\n", | |
| }, | |
| }, | |
| ] | |
| ) | |
| # Create performance text as a single mrkdwn string | |
| if common_configs: | |
| performance_text = "π‘ *Performance Updates* π‘\n\n" | |
| # Group by model for better organization | |
| models = sorted(set(model for model, _, _ in common_configs)) | |
| for model in models: | |
| model_configs = sorted([cfg for cfg in common_configs if cfg[0] == model]) | |
| for config in model_configs: | |
| device_info = f"*{model}* ({config[2]})" | |
| if not has_changes(config, prev_dict, curr_dict): | |
| # If no changes, just add the model with a checkmark | |
| performance_text += f"{device_info} β \n\n" | |
| else: | |
| # If there are changes, show the metrics | |
| performance_text += f"{device_info}\n" | |
| table = format_metrics_table(config, prev_dict, curr_dict, improved_metrics, regressed_metrics) | |
| performance_text += table | |
| performance_text += "\n\n" | |
| # Add metrics summary | |
| for metric_name, key in [ | |
| ("Speed", "speed"), | |
| ("Tok/s", "tokens_per_second"), | |
| ("WER", "average_wer"), | |
| ("QoI", "qoi"), | |
| ]: | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ *{metric_name}:* `{improved_metrics[key]}` improved, `{regressed_metrics[key]}` regressed", | |
| }, | |
| } | |
| ) | |
| # Add support changes section | |
| if fixed_errors or new_errors or new_configs: | |
| slack_blocks["blocks"].extend( | |
| [ | |
| {"type": "divider"}, | |
| { | |
| "type": "section", | |
| "text": {"type": "mrkdwn", "text": "π± *DEVICE SUPPORT CHANGES* π±"}, | |
| }, | |
| ] | |
| ) | |
| if fixed_errors: | |
| slack_blocks["blocks"].extend( | |
| [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "*Successful Configurations That Override Previous Failures*", | |
| }, | |
| } | |
| ] | |
| ) | |
| for model, device, os_version in sorted(fixed_errors): | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ {model} on {device} ({os_version})", | |
| }, | |
| } | |
| ) | |
| if new_errors: | |
| slack_blocks["blocks"].extend( | |
| [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "*Failed Configurations That Override Previous Successes*", | |
| }, | |
| } | |
| ] | |
| ) | |
| for model, device, os_version in sorted(new_errors): | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ {model} on {device} ({os_version})", | |
| }, | |
| } | |
| ) | |
| if new_configs: | |
| slack_blocks["blocks"].extend( | |
| [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "*Newly Tested Configurations*", | |
| }, | |
| } | |
| ] | |
| ) | |
| for model, device, os_version in sorted(new_configs): | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": f"β’ {model} on {device} ({os_version})", | |
| }, | |
| } | |
| ) | |
| # Add alert if significant decrease in device count | |
| if needs_alert: | |
| slack_blocks["blocks"].append( | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "β οΈ *ALERT:* Current device count is less than 90% of previous version's device count, test on more devices before updating the benchmark website!", | |
| }, | |
| } | |
| ) | |
| # Write to GITHUB_OUTPUT | |
| github_output = os.getenv("GITHUB_OUTPUT") | |
| if github_output: | |
| with open(github_output, "a") as f: | |
| f.write("slack_message_payload<<EOF\n") | |
| json.dump(slack_blocks, f, indent=2) | |
| f.write("\nEOF\n") | |
| with open(github_output, "a") as f: | |
| escaped_text = escape_string(performance_text) | |
| print(f"performance_message={escaped_text}", file=f) | |
| if __name__ == "__main__": | |
| generate_report() | |