code-comment-classifier / scripts /publish_to_hub.py

Snaseem2026

Upload folder using huggingface_hub

7762e8f verified 5 days ago

8.06 kB

	"""
	Comprehensive script to publish model and codebase to Hugging Face Hub
	"""
	import argparse
	import os
	import sys
	from pathlib import Path
	from huggingface_hub import HfApi, create_repo, upload_folder, upload_file
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# Add parent directory to path
	sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))


	def publish_to_hub(
	model_path: str,
	repo_id: str,
	private: bool = False,
	upload_code: bool = True,
	upload_model: bool = True
	):
	"""
	Publish model and codebase to Hugging Face Hub.

	Args:
	model_path: Path to the trained model
	repo_id: Full repository ID (e.g., "username/repo-name")
	private: Whether to make the repository private
	upload_code: Whether to upload code files
	upload_model: Whether to upload the model
	"""
	print("=" * 70)
	print("Publishing to Hugging Face Hub")
	print("=" * 70)
	print(f"\nRepository: {repo_id}")
	print(f"Private: {private}")
	print(f"Upload Model: {upload_model}")
	print(f"Upload Code: {upload_code}")

	api = HfApi()

	# Create repository
	print("\n[1/4] Creating/verifying repository...")
	try:
	create_repo(
	repo_id=repo_id,
	repo_type="model",
	exist_ok=True,
	private=private
	)
	print(f"✓ Repository ready: {repo_id}")
	except Exception as e:
	print(f"✗ Error creating repository: {e}")
	print("\nMake sure you're logged in:")
	print(" huggingface-cli login")
	return False

	# Upload model and tokenizer
	if upload_model:
	print("\n[2/4] Uploading model and tokenizer...")
	try:
	if not os.path.exists(model_path):
	print(f"✗ Model path not found: {model_path}")
	print(" Skipping model upload. You can upload it later.")
	else:
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path)

	model.push_to_hub(repo_id)
	tokenizer.push_to_hub(repo_id)
	print("✓ Model and tokenizer uploaded")
	except Exception as e:
	print(f"✗ Error uploading model: {e}")
	print(" You can upload the model separately later.")
	else:
	print("\n[2/4] Skipping model upload (--no-model flag)")

	# Upload code files
	if upload_code:
	print("\n[3/4] Uploading code files...")
	try:
	repo_root = Path(__file__).parent.parent

	# Files to upload
	code_files = [
	"train.py",
	"inference.py",
	"config.yaml",
	"requirements.txt",
	"setup.py",
	"README.md",
	"MODEL_CARD.md",
	"LICENSE",
	".gitignore"
	]

	# Directories to upload
	code_dirs = [
	"src",
	"scripts"
	]

	uploaded_count = 0

	# Upload individual files
	for file_name in code_files:
	file_path = repo_root / file_name
	if file_path.exists():
	try:
	upload_file(
	path_or_fileobj=str(file_path),
	path_in_repo=file_name,
	repo_id=repo_id,
	repo_type="model"
	)
	print(f" ✓ Uploaded {file_name}")
	uploaded_count += 1
	except Exception as e:
	print(f" ⚠ Could not upload {file_name}: {e}")

	# Upload directories
	for dir_name in code_dirs:
	dir_path = repo_root / dir_name
	if dir_path.exists() and dir_path.is_dir():
	try:
	upload_folder(
	folder_path=str(dir_path),
	path_in_repo=dir_name,
	repo_id=repo_id,
	repo_type="model",
	ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"]
	)
	print(f" ✓ Uploaded {dir_name}/")
	uploaded_count += 1
	except Exception as e:
	print(f" ⚠ Could not upload {dir_name}/: {e}")

	print(f"\n✓ Uploaded {uploaded_count} code files/directories")

	except Exception as e:
	print(f"✗ Error uploading code: {e}")
	else:
	print("\n[3/4] Skipping code upload (--no-code flag)")

	# Final summary
	print("\n[4/4] Publishing complete!")
	print("\n" + "=" * 70)
	print("Success! 🎉")
	print("=" * 70)
	print(f"\nYour model is now available at:")
	print(f"https://huggingface.co/{repo_id}")

	if upload_model:
	print("\nTo use your model:")
	print(f"""
	from transformers import pipeline

	classifier = pipeline("text-classification", model="{repo_id}")

	# Classify a comment
	result = classifier("This function uses dynamic programming for O(n) time complexity")
	print(result)
	""")

	return True


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Publish model and codebase to Hugging Face Hub",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Publish everything (model + code)
	python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier

	# Publish only code (no model)
	python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model

	# Publish only model (no code)
	python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code

	# Private repository
	python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private
	"""
	)
	parser.add_argument(
	"--model-path",
	type=str,
	default="./results/final_model",
	help="Path to the trained model"
	)
	parser.add_argument(
	"--repo-id",
	type=str,
	default="Snaseem2026/code-comment-classifier",
	help="Full repository ID (e.g., 'username/repo-name')"
	)
	parser.add_argument(
	"--private",
	action="store_true",
	help="Make the repository private"
	)
	parser.add_argument(
	"--no-code",
	action="store_true",
	help="Skip uploading code files"
	)
	parser.add_argument(
	"--no-model",
	action="store_true",
	help="Skip uploading model files"
	)
	parser.add_argument(
	"--yes",
	action="store_true",
	help="Skip confirmation prompt"
	)

	args = parser.parse_args()

	print("\n" + "=" * 70)
	print("Hugging Face Hub Publishing")
	print("=" * 70)
	print("\nBefore publishing, make sure you:")
	print("1. Have a Hugging Face account")
	print("2. Are logged in: huggingface-cli login")
	print("3. Have reviewed MODEL_CARD.md and README.md")
	print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})")

	if not args.yes:
	print("\n" + "=" * 70)
	response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ")
	if response.lower() not in ['yes', 'y']:
	print("Publishing cancelled.")
	sys.exit(0)

	success = publish_to_hub(
	model_path=args.model_path,
	repo_id=args.repo_id,
	private=args.private,
	upload_code=not args.no_code,
	upload_model=not args.no_model
	)

	if not success:
	sys.exit(1)