| import argparse | |
| import os | |
| import yaml | |
| from huggingface_hub import hf_hub_download, list_repo_files | |
| def load_config(config_path): | |
| with open(config_path, 'r') as file: | |
| return yaml.safe_load(file) | |
| def download_model(model_config): | |
| model_id = model_config["model_id"] | |
| local_dir = model_config["local_dir"] | |
| if local_dir is None: | |
| print(f"Skipping download for {model_id}: local_dir is null") | |
| return | |
| os.makedirs(local_dir, exist_ok=True) | |
| allow_patterns = model_config.get("allow", []) | |
| deny_patterns = model_config.get("deny", []) | |
| if allow_patterns: | |
| for file in allow_patterns: | |
| hf_hub_download( | |
| repo_id=model_id, | |
| filename=file, | |
| local_dir=local_dir, | |
| local_dir_use_symlinks=False | |
| ) | |
| else: | |
| print(f"No allow patterns specified for {model_id}. Attempting to download all files except those in deny list.") | |
| repo_files = list_repo_files(repo_id=model_id) | |
| for file in repo_files: | |
| if not any(deny_pattern in file for deny_pattern in deny_patterns): | |
| hf_hub_download( | |
| repo_id=model_id, | |
| filename=file, | |
| local_dir=local_dir, | |
| local_dir_use_symlinks=False | |
| ) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Download model checkpoints from Hugging Face Hub") | |
| parser.add_argument( | |
| "--config_path", | |
| type=str, | |
| default="configs/model_ckpts.yaml", | |
| help="Path to the configuration YAML file" | |
| ) | |
| args = parser.parse_args() | |
| config = load_config(args.config_path) | |
| for model_config in config: | |
| print(f"Processing {model_config['model_id']} (local_dir: {model_config['local_dir']})") | |
| download_model(model_config) |