|
|
""" |
|
|
Comprehensive script to publish model and codebase to Hugging Face Hub |
|
|
""" |
|
|
import argparse |
|
|
import os |
|
|
import sys |
|
|
from pathlib import Path |
|
|
from huggingface_hub import HfApi, create_repo, upload_folder, upload_file |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
|
|
|
|
|
def publish_to_hub( |
|
|
model_path: str, |
|
|
repo_id: str, |
|
|
private: bool = False, |
|
|
upload_code: bool = True, |
|
|
upload_model: bool = True |
|
|
): |
|
|
""" |
|
|
Publish model and codebase to Hugging Face Hub. |
|
|
|
|
|
Args: |
|
|
model_path: Path to the trained model |
|
|
repo_id: Full repository ID (e.g., "username/repo-name") |
|
|
private: Whether to make the repository private |
|
|
upload_code: Whether to upload code files |
|
|
upload_model: Whether to upload the model |
|
|
""" |
|
|
print("=" * 70) |
|
|
print("Publishing to Hugging Face Hub") |
|
|
print("=" * 70) |
|
|
print(f"\nRepository: {repo_id}") |
|
|
print(f"Private: {private}") |
|
|
print(f"Upload Model: {upload_model}") |
|
|
print(f"Upload Code: {upload_code}") |
|
|
|
|
|
api = HfApi() |
|
|
|
|
|
|
|
|
print("\n[1/4] Creating/verifying repository...") |
|
|
try: |
|
|
create_repo( |
|
|
repo_id=repo_id, |
|
|
repo_type="model", |
|
|
exist_ok=True, |
|
|
private=private |
|
|
) |
|
|
print(f"✓ Repository ready: {repo_id}") |
|
|
except Exception as e: |
|
|
print(f"✗ Error creating repository: {e}") |
|
|
print("\nMake sure you're logged in:") |
|
|
print(" huggingface-cli login") |
|
|
return False |
|
|
|
|
|
|
|
|
if upload_model: |
|
|
print("\n[2/4] Uploading model and tokenizer...") |
|
|
try: |
|
|
if not os.path.exists(model_path): |
|
|
print(f"✗ Model path not found: {model_path}") |
|
|
print(" Skipping model upload. You can upload it later.") |
|
|
else: |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
|
|
model.push_to_hub(repo_id) |
|
|
tokenizer.push_to_hub(repo_id) |
|
|
print("✓ Model and tokenizer uploaded") |
|
|
except Exception as e: |
|
|
print(f"✗ Error uploading model: {e}") |
|
|
print(" You can upload the model separately later.") |
|
|
else: |
|
|
print("\n[2/4] Skipping model upload (--no-model flag)") |
|
|
|
|
|
|
|
|
if upload_code: |
|
|
print("\n[3/4] Uploading code files...") |
|
|
try: |
|
|
repo_root = Path(__file__).parent.parent |
|
|
|
|
|
|
|
|
code_files = [ |
|
|
"train.py", |
|
|
"inference.py", |
|
|
"config.yaml", |
|
|
"requirements.txt", |
|
|
"setup.py", |
|
|
"README.md", |
|
|
"MODEL_CARD.md", |
|
|
"LICENSE", |
|
|
".gitignore" |
|
|
] |
|
|
|
|
|
|
|
|
code_dirs = [ |
|
|
"src", |
|
|
"scripts" |
|
|
] |
|
|
|
|
|
uploaded_count = 0 |
|
|
|
|
|
|
|
|
for file_name in code_files: |
|
|
file_path = repo_root / file_name |
|
|
if file_path.exists(): |
|
|
try: |
|
|
upload_file( |
|
|
path_or_fileobj=str(file_path), |
|
|
path_in_repo=file_name, |
|
|
repo_id=repo_id, |
|
|
repo_type="model" |
|
|
) |
|
|
print(f" ✓ Uploaded {file_name}") |
|
|
uploaded_count += 1 |
|
|
except Exception as e: |
|
|
print(f" ⚠ Could not upload {file_name}: {e}") |
|
|
|
|
|
|
|
|
for dir_name in code_dirs: |
|
|
dir_path = repo_root / dir_name |
|
|
if dir_path.exists() and dir_path.is_dir(): |
|
|
try: |
|
|
upload_folder( |
|
|
folder_path=str(dir_path), |
|
|
path_in_repo=dir_name, |
|
|
repo_id=repo_id, |
|
|
repo_type="model", |
|
|
ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"] |
|
|
) |
|
|
print(f" ✓ Uploaded {dir_name}/") |
|
|
uploaded_count += 1 |
|
|
except Exception as e: |
|
|
print(f" ⚠ Could not upload {dir_name}/: {e}") |
|
|
|
|
|
print(f"\n✓ Uploaded {uploaded_count} code files/directories") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error uploading code: {e}") |
|
|
else: |
|
|
print("\n[3/4] Skipping code upload (--no-code flag)") |
|
|
|
|
|
|
|
|
print("\n[4/4] Publishing complete!") |
|
|
print("\n" + "=" * 70) |
|
|
print("Success! 🎉") |
|
|
print("=" * 70) |
|
|
print(f"\nYour model is now available at:") |
|
|
print(f"https://huggingface.co/{repo_id}") |
|
|
|
|
|
if upload_model: |
|
|
print("\nTo use your model:") |
|
|
print(f""" |
|
|
from transformers import pipeline |
|
|
|
|
|
classifier = pipeline("text-classification", model="{repo_id}") |
|
|
|
|
|
# Classify a comment |
|
|
result = classifier("This function uses dynamic programming for O(n) time complexity") |
|
|
print(result) |
|
|
""") |
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Publish model and codebase to Hugging Face Hub", |
|
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
|
epilog=""" |
|
|
Examples: |
|
|
# Publish everything (model + code) |
|
|
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier |
|
|
|
|
|
# Publish only code (no model) |
|
|
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model |
|
|
|
|
|
# Publish only model (no code) |
|
|
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code |
|
|
|
|
|
# Private repository |
|
|
python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private |
|
|
""" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--model-path", |
|
|
type=str, |
|
|
default="./results/final_model", |
|
|
help="Path to the trained model" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--repo-id", |
|
|
type=str, |
|
|
default="Snaseem2026/code-comment-classifier", |
|
|
help="Full repository ID (e.g., 'username/repo-name')" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--private", |
|
|
action="store_true", |
|
|
help="Make the repository private" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--no-code", |
|
|
action="store_true", |
|
|
help="Skip uploading code files" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--no-model", |
|
|
action="store_true", |
|
|
help="Skip uploading model files" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--yes", |
|
|
action="store_true", |
|
|
help="Skip confirmation prompt" |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
print("\n" + "=" * 70) |
|
|
print("Hugging Face Hub Publishing") |
|
|
print("=" * 70) |
|
|
print("\nBefore publishing, make sure you:") |
|
|
print("1. Have a Hugging Face account") |
|
|
print("2. Are logged in: huggingface-cli login") |
|
|
print("3. Have reviewed MODEL_CARD.md and README.md") |
|
|
print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})") |
|
|
|
|
|
if not args.yes: |
|
|
print("\n" + "=" * 70) |
|
|
response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ") |
|
|
if response.lower() not in ['yes', 'y']: |
|
|
print("Publishing cancelled.") |
|
|
sys.exit(0) |
|
|
|
|
|
success = publish_to_hub( |
|
|
model_path=args.model_path, |
|
|
repo_id=args.repo_id, |
|
|
private=args.private, |
|
|
upload_code=not args.no_code, |
|
|
upload_model=not args.no_model |
|
|
) |
|
|
|
|
|
if not success: |
|
|
sys.exit(1) |
|
|
|