File size: 3,026 Bytes
4089b4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
"""
Model definition and utilities
"""
from transformers import AutoModelForSequenceClassification, AutoConfig
from typing import Dict, Optional
import logging
import torch
import torch.nn as nn
def create_model(
model_name: str,
num_labels: int,
label2id: Dict[str, int],
id2label: Dict[int, str],
dropout: Optional[float] = None
):
"""
Create a sequence classification model with optional dropout configuration.
Args:
model_name: Name of the pretrained model
num_labels: Number of classification labels
label2id: Mapping from label names to IDs
id2label: Mapping from IDs to label names
dropout: Optional dropout probability for classifier head
Returns:
Initialized model
"""
config = AutoConfig.from_pretrained(
model_name,
num_labels=num_labels,
label2id=label2id,
id2label=id2label
)
# Set dropout if provided
if dropout is not None:
if hasattr(config, 'hidden_dropout_prob'):
config.hidden_dropout_prob = dropout
if hasattr(config, 'attention_probs_dropout_prob'):
config.attention_probs_dropout_prob = dropout
if hasattr(config, 'classifier_dropout'):
config.classifier_dropout = dropout
logging.info(f"Set model dropout to {dropout}")
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
config=config
)
return model
def apply_class_weights(
model: nn.Module,
class_weights: Optional[list] = None
) -> Optional[nn.Module]:
"""
Apply class weights to the model's loss function.
Args:
model: The model to modify
class_weights: List of weights for each class (must match num_labels)
Returns:
Model with modified loss function (if class_weights provided)
"""
if class_weights is not None:
weights_tensor = torch.tensor(class_weights, dtype=torch.float32)
# Note: This requires custom Trainer with weighted loss
logging.info(f"Class weights applied: {class_weights}")
return weights_tensor
return None
def get_model_size(model: nn.Module) -> float:
"""
Calculate model size in millions of parameters.
Args:
model: PyTorch model
Returns:
Number of parameters in millions
"""
param_size = sum(p.numel() for p in model.parameters())
return param_size / 1e6
def get_trainable_params(model: nn.Module) -> Dict[str, int]:
"""
Get count of trainable and non-trainable parameters.
Args:
model: PyTorch model
Returns:
Dictionary with 'trainable' and 'total' parameter counts
"""
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
return {
'trainable': trainable,
'total': total,
'non_trainable': total - trainable
}
|