File size: 5,777 Bytes
557c6b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Utility functions for handling Gemma models
"""

import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login, HfApi

def get_available_models():
    """
    Returns a list of available Gemma models for fine-tuning.
    """
    return [
        "google/gemma-2-2b-it",
        "google/gemma-2-9b-it",
        "google/gemma-2-27b-it"
    ]

def load_model(model_name, token=None):
    """
    Loads a model from Hugging Face Hub.
    
    Args:
        model_name: Name of the model to load
        token: Hugging Face token for access to gated models
        
    Returns:
        Tuple of (model, tokenizer)
    """
    if token:
        login(token)
        
    # Set appropriate device
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"  # For Apple Silicon
    else:
        device = "cpu"
        
    print(f"Loading model {model_name} on {device}...")
    
    # Load model with appropriate parameters based on device and model size
    model_size = model_name.split("-")[2]
    if device == "cuda":
        # For CUDA devices, optimize based on model size and available memory
        if model_size in ["2b", "7b"]:
            # Smaller models can be loaded in BF16
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.bfloat16,
                device_map="auto"
            )
        else:
            # Larger models may need additional optimizations
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                load_in_8bit=True
            )
    elif device == "cpu":
        # For CPU, use FP32 but load 8-bit for larger models to conserve memory
        if model_size in ["2b"]:
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map={"": device}
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map={"": device},
                load_in_8bit=True
            )
    else:  # MPS (Apple Silicon)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map={"": device}
        )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    return model, tokenizer

def export_model(model_path, output_dir, model_name, format="pytorch", quantization=None):
    """
    Exports a fine-tuned model to the specified format.
    
    Args:
        model_path: Path to the fine-tuned model
        output_dir: Directory to save the exported model
        model_name: Name for the exported model
        format: Export format ("pytorch", "gguf", or "safetensors")
        quantization: Quantization level for GGUF format
        
    Returns:
        Dictionary with export information
    """
    if not os.path.exists(model_path):
        raise ValueError(f"Model path '{model_path}' does not exist")
    
    os.makedirs(output_dir, exist_ok=True)
    export_path = os.path.join(output_dir, model_name)
    os.makedirs(export_path, exist_ok=True)
    
    # Load the model and merge LoRA weights if applicable
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # Handle different export formats
    if format.lower() == "pytorch":
        # Export as PyTorch model
        model.save_pretrained(export_path)
        tokenizer.save_pretrained(export_path)
    
    elif format.lower() == "safetensors":
        # Export as safetensors
        model.save_pretrained(export_path, safe_serialization=True)
        tokenizer.save_pretrained(export_path)
    
    elif format.lower() == "gguf":
        # For GGUF, we'd typically use a conversion script
        # This is simplified; in practice you'd use specific tools for GGUF conversion
        if quantization is not None and quantization.lower() != "none":
            # Command for quantized GGUF conversion would go here
            # In practice, use llama.cpp or similar tools
            pass
        else:
            # Command for standard GGUF conversion would go here
            pass
    
    else:
        raise ValueError(f"Unsupported export format: {format}")
    
    # Calculate model size
    model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
    model_size_gb = model_size_bytes / (1024**3)
    
    return {
        "format": format.lower(),
        "quantization": quantization if format.lower() == "gguf" else "None",
        "model_name": model_name,
        "export_path": export_path,
        "model_size": f"{model_size_gb:.2f} GB"
    }

def push_to_hub(model_path, repo_name, token):
    """
    Pushes a fine-tuned model to Hugging Face Hub.
    
    Args:
        model_path: Path to the fine-tuned model
        repo_name: Name for the repository on Hugging Face Hub
        token: Hugging Face token
        
    Returns:
        URL of the uploaded model
    """
    if not os.path.exists(model_path):
        raise ValueError(f"Model path '{model_path}' does not exist")
    
    login(token)
    
    # Load the model and merge LoRA weights if applicable
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # Push to hub
    model.push_to_hub(repo_name)
    tokenizer.push_to_hub(repo_name)
    
    # Get the model URL
    api = HfApi()
    model_url = f"https://huggingface.co/{repo_name}"
    
    return model_url