from huggingface_hub import HfApi, HfFileSystem, login import pandas as pd fs = HfFileSystem() IMPORTANT_MODELS = [ "auto", "bert", # old but dominant (encoder only) "gpt2", # old (decoder) "t5", # old (encoder-decoder) "modernbert", # (encoder only) "vit", # old (vision) - fixed comma "clip", # old but dominant (vision) "detr", # objection detection, segmentation (vision) "table-transformer", # objection detection (visioin) - maybe just detr? "got_ocr2", # ocr (vision) "whisper", # old but dominant (audio) "wav2vec2", # old (audio) "llama", # new and dominant (meta) "gemma3", # new (google) "qwen2", # new (Alibaba) "mistral3", # new (Mistral) - added missing comma "qwen2_5_vl", # new (vision) "llava", # many models from it (vision) "smolvlm", # new (video) "internvl", # new (video) "gemma3n", # new (omnimodal models) "qwen2_5_omni", # new (omnimodal models) ] def get_data(): files_amd = fs.glob( "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json" ) files_amd.sort(reverse=True) df_amd = pd.read_json(f"hf://{files_amd[0]}", orient="index") df_amd.index.name = "model_name" df_amd["failed_multi_no_amd"] = df_amd["failures"].apply( lambda x: len(x["multi"]) if "multi" in x else 0 ) df_amd["failed_single_no_amd"] = df_amd["failures"].apply( lambda x: len(x["single"]) if "single" in x else 0 ) files_nvidia = fs.glob( "hf://datasets/hf-internal-testing/transformers_daily_ci/**/ci_results_run_models_gpu/model_results.json" ) files_nvidia.sort(reverse=True) df_nvidia = pd.read_json( f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/{files_nvidia[0].lstrip('datasets/hf-internal-testing/transformers_daily_ci/')}", orient="index", ) df_nvidia.index.name = "model_name" df_nvidia["failed_multi_no_nvidia"] = df_nvidia["failures"].apply( lambda x: len(x["multi"]) if "multi" in x else 0 ) df_nvidia["failed_single_no_nvidia"] = df_nvidia["failures"].apply( lambda x: len(x["single"]) if "single" in x else 0 ) df_nvidia joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer") joined = joined[ [ "success_amd", "success_nvidia", "failed_multi_no_amd", "failed_multi_no_nvidia", "failed_single_no_amd", "failed_single_no_nvidia", "failures_amd", "failures_nvidia", "job_link_amd", "job_link_nvidia", ] ] joined.index = joined.index.str.replace("^models_", "", regex=True) important_models_lower = [model.lower() for model in IMPORTANT_MODELS] filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)] return filtered_joined