""" Comprehensive script to publish model and codebase to Hugging Face Hub """ import argparse import os import sys from pathlib import Path from huggingface_hub import HfApi, create_repo, upload_folder, upload_file from transformers import AutoTokenizer, AutoModelForSequenceClassification # Add parent directory to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) def publish_to_hub( model_path: str, repo_id: str, private: bool = False, upload_code: bool = True, upload_model: bool = True ): """ Publish model and codebase to Hugging Face Hub. Args: model_path: Path to the trained model repo_id: Full repository ID (e.g., "username/repo-name") private: Whether to make the repository private upload_code: Whether to upload code files upload_model: Whether to upload the model """ print("=" * 70) print("Publishing to Hugging Face Hub") print("=" * 70) print(f"\nRepository: {repo_id}") print(f"Private: {private}") print(f"Upload Model: {upload_model}") print(f"Upload Code: {upload_code}") api = HfApi() # Create repository print("\n[1/4] Creating/verifying repository...") try: create_repo( repo_id=repo_id, repo_type="model", exist_ok=True, private=private ) print(f"✓ Repository ready: {repo_id}") except Exception as e: print(f"✗ Error creating repository: {e}") print("\nMake sure you're logged in:") print(" huggingface-cli login") return False # Upload model and tokenizer if upload_model: print("\n[2/4] Uploading model and tokenizer...") try: if not os.path.exists(model_path): print(f"✗ Model path not found: {model_path}") print(" Skipping model upload. You can upload it later.") else: tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) model.push_to_hub(repo_id) tokenizer.push_to_hub(repo_id) print("✓ Model and tokenizer uploaded") except Exception as e: print(f"✗ Error uploading model: {e}") print(" You can upload the model separately later.") else: print("\n[2/4] Skipping model upload (--no-model flag)") # Upload code files if upload_code: print("\n[3/4] Uploading code files...") try: repo_root = Path(__file__).parent.parent # Files to upload code_files = [ "train.py", "inference.py", "config.yaml", "requirements.txt", "setup.py", "README.md", "MODEL_CARD.md", "LICENSE", ".gitignore" ] # Directories to upload code_dirs = [ "src", "scripts" ] uploaded_count = 0 # Upload individual files for file_name in code_files: file_path = repo_root / file_name if file_path.exists(): try: upload_file( path_or_fileobj=str(file_path), path_in_repo=file_name, repo_id=repo_id, repo_type="model" ) print(f" ✓ Uploaded {file_name}") uploaded_count += 1 except Exception as e: print(f" ⚠ Could not upload {file_name}: {e}") # Upload directories for dir_name in code_dirs: dir_path = repo_root / dir_name if dir_path.exists() and dir_path.is_dir(): try: upload_folder( folder_path=str(dir_path), path_in_repo=dir_name, repo_id=repo_id, repo_type="model", ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"] ) print(f" ✓ Uploaded {dir_name}/") uploaded_count += 1 except Exception as e: print(f" ⚠ Could not upload {dir_name}/: {e}") print(f"\n✓ Uploaded {uploaded_count} code files/directories") except Exception as e: print(f"✗ Error uploading code: {e}") else: print("\n[3/4] Skipping code upload (--no-code flag)") # Final summary print("\n[4/4] Publishing complete!") print("\n" + "=" * 70) print("Success! 🎉") print("=" * 70) print(f"\nYour model is now available at:") print(f"https://huggingface.co/{repo_id}") if upload_model: print("\nTo use your model:") print(f""" from transformers import pipeline classifier = pipeline("text-classification", model="{repo_id}") # Classify a comment result = classifier("This function uses dynamic programming for O(n) time complexity") print(result) """) return True if __name__ == "__main__": parser = argparse.ArgumentParser( description="Publish model and codebase to Hugging Face Hub", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Publish everything (model + code) python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier # Publish only code (no model) python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model # Publish only model (no code) python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code # Private repository python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private """ ) parser.add_argument( "--model-path", type=str, default="./results/final_model", help="Path to the trained model" ) parser.add_argument( "--repo-id", type=str, default="Snaseem2026/code-comment-classifier", help="Full repository ID (e.g., 'username/repo-name')" ) parser.add_argument( "--private", action="store_true", help="Make the repository private" ) parser.add_argument( "--no-code", action="store_true", help="Skip uploading code files" ) parser.add_argument( "--no-model", action="store_true", help="Skip uploading model files" ) parser.add_argument( "--yes", action="store_true", help="Skip confirmation prompt" ) args = parser.parse_args() print("\n" + "=" * 70) print("Hugging Face Hub Publishing") print("=" * 70) print("\nBefore publishing, make sure you:") print("1. Have a Hugging Face account") print("2. Are logged in: huggingface-cli login") print("3. Have reviewed MODEL_CARD.md and README.md") print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})") if not args.yes: print("\n" + "=" * 70) response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ") if response.lower() not in ['yes', 'y']: print("Publishing cancelled.") sys.exit(0) success = publish_to_hub( model_path=args.model_path, repo_id=args.repo_id, private=args.private, upload_code=not args.no_code, upload_model=not args.no_model ) if not success: sys.exit(1)