code-comment-classifier / scripts /publish_to_hub.py
Snaseem2026's picture
Upload folder using huggingface_hub
7762e8f verified
"""
Comprehensive script to publish model and codebase to Hugging Face Hub
"""
import argparse
import os
import sys
from pathlib import Path
from huggingface_hub import HfApi, create_repo, upload_folder, upload_file
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Add parent directory to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
def publish_to_hub(
model_path: str,
repo_id: str,
private: bool = False,
upload_code: bool = True,
upload_model: bool = True
):
"""
Publish model and codebase to Hugging Face Hub.
Args:
model_path: Path to the trained model
repo_id: Full repository ID (e.g., "username/repo-name")
private: Whether to make the repository private
upload_code: Whether to upload code files
upload_model: Whether to upload the model
"""
print("=" * 70)
print("Publishing to Hugging Face Hub")
print("=" * 70)
print(f"\nRepository: {repo_id}")
print(f"Private: {private}")
print(f"Upload Model: {upload_model}")
print(f"Upload Code: {upload_code}")
api = HfApi()
# Create repository
print("\n[1/4] Creating/verifying repository...")
try:
create_repo(
repo_id=repo_id,
repo_type="model",
exist_ok=True,
private=private
)
print(f"✓ Repository ready: {repo_id}")
except Exception as e:
print(f"✗ Error creating repository: {e}")
print("\nMake sure you're logged in:")
print(" huggingface-cli login")
return False
# Upload model and tokenizer
if upload_model:
print("\n[2/4] Uploading model and tokenizer...")
try:
if not os.path.exists(model_path):
print(f"✗ Model path not found: {model_path}")
print(" Skipping model upload. You can upload it later.")
else:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)
print("✓ Model and tokenizer uploaded")
except Exception as e:
print(f"✗ Error uploading model: {e}")
print(" You can upload the model separately later.")
else:
print("\n[2/4] Skipping model upload (--no-model flag)")
# Upload code files
if upload_code:
print("\n[3/4] Uploading code files...")
try:
repo_root = Path(__file__).parent.parent
# Files to upload
code_files = [
"train.py",
"inference.py",
"config.yaml",
"requirements.txt",
"setup.py",
"README.md",
"MODEL_CARD.md",
"LICENSE",
".gitignore"
]
# Directories to upload
code_dirs = [
"src",
"scripts"
]
uploaded_count = 0
# Upload individual files
for file_name in code_files:
file_path = repo_root / file_name
if file_path.exists():
try:
upload_file(
path_or_fileobj=str(file_path),
path_in_repo=file_name,
repo_id=repo_id,
repo_type="model"
)
print(f" ✓ Uploaded {file_name}")
uploaded_count += 1
except Exception as e:
print(f" ⚠ Could not upload {file_name}: {e}")
# Upload directories
for dir_name in code_dirs:
dir_path = repo_root / dir_name
if dir_path.exists() and dir_path.is_dir():
try:
upload_folder(
folder_path=str(dir_path),
path_in_repo=dir_name,
repo_id=repo_id,
repo_type="model",
ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"]
)
print(f" ✓ Uploaded {dir_name}/")
uploaded_count += 1
except Exception as e:
print(f" ⚠ Could not upload {dir_name}/: {e}")
print(f"\n✓ Uploaded {uploaded_count} code files/directories")
except Exception as e:
print(f"✗ Error uploading code: {e}")
else:
print("\n[3/4] Skipping code upload (--no-code flag)")
# Final summary
print("\n[4/4] Publishing complete!")
print("\n" + "=" * 70)
print("Success! 🎉")
print("=" * 70)
print(f"\nYour model is now available at:")
print(f"https://huggingface.co/{repo_id}")
if upload_model:
print("\nTo use your model:")
print(f"""
from transformers import pipeline
classifier = pipeline("text-classification", model="{repo_id}")
# Classify a comment
result = classifier("This function uses dynamic programming for O(n) time complexity")
print(result)
""")
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Publish model and codebase to Hugging Face Hub",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Publish everything (model + code)
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier
# Publish only code (no model)
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model
# Publish only model (no code)
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code
# Private repository
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private
"""
)
parser.add_argument(
"--model-path",
type=str,
default="./results/final_model",
help="Path to the trained model"
)
parser.add_argument(
"--repo-id",
type=str,
default="Snaseem2026/code-comment-classifier",
help="Full repository ID (e.g., 'username/repo-name')"
)
parser.add_argument(
"--private",
action="store_true",
help="Make the repository private"
)
parser.add_argument(
"--no-code",
action="store_true",
help="Skip uploading code files"
)
parser.add_argument(
"--no-model",
action="store_true",
help="Skip uploading model files"
)
parser.add_argument(
"--yes",
action="store_true",
help="Skip confirmation prompt"
)
args = parser.parse_args()
print("\n" + "=" * 70)
print("Hugging Face Hub Publishing")
print("=" * 70)
print("\nBefore publishing, make sure you:")
print("1. Have a Hugging Face account")
print("2. Are logged in: huggingface-cli login")
print("3. Have reviewed MODEL_CARD.md and README.md")
print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})")
if not args.yes:
print("\n" + "=" * 70)
response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ")
if response.lower() not in ['yes', 'y']:
print("Publishing cancelled.")
sys.exit(0)
success = publish_to_hub(
model_path=args.model_path,
repo_id=args.repo_id,
private=args.private,
upload_code=not args.no_code,
upload_model=not args.no_model
)
if not success:
sys.exit(1)