File size: 3,026 Bytes
4089b4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Model definition and utilities
"""
from transformers import AutoModelForSequenceClassification, AutoConfig
from typing import Dict, Optional
import logging
import torch
import torch.nn as nn


def create_model(
    model_name: str,
    num_labels: int,
    label2id: Dict[str, int],
    id2label: Dict[int, str],
    dropout: Optional[float] = None
):
    """
    Create a sequence classification model with optional dropout configuration.
    
    Args:
        model_name: Name of the pretrained model
        num_labels: Number of classification labels
        label2id: Mapping from label names to IDs
        id2label: Mapping from IDs to label names
        dropout: Optional dropout probability for classifier head
        
    Returns:
        Initialized model
    """
    config = AutoConfig.from_pretrained(
        model_name,
        num_labels=num_labels,
        label2id=label2id,
        id2label=id2label
    )
    
    # Set dropout if provided
    if dropout is not None:
        if hasattr(config, 'hidden_dropout_prob'):
            config.hidden_dropout_prob = dropout
        if hasattr(config, 'attention_probs_dropout_prob'):
            config.attention_probs_dropout_prob = dropout
        if hasattr(config, 'classifier_dropout'):
            config.classifier_dropout = dropout
        logging.info(f"Set model dropout to {dropout}")
    
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config
    )
    
    return model


def apply_class_weights(
    model: nn.Module,
    class_weights: Optional[list] = None
) -> Optional[nn.Module]:
    """
    Apply class weights to the model's loss function.
    
    Args:
        model: The model to modify
        class_weights: List of weights for each class (must match num_labels)
        
    Returns:
        Model with modified loss function (if class_weights provided)
    """
    if class_weights is not None:
        weights_tensor = torch.tensor(class_weights, dtype=torch.float32)
        # Note: This requires custom Trainer with weighted loss
        logging.info(f"Class weights applied: {class_weights}")
        return weights_tensor
    return None


def get_model_size(model: nn.Module) -> float:
    """
    Calculate model size in millions of parameters.
    
    Args:
        model: PyTorch model
        
    Returns:
        Number of parameters in millions
    """
    param_size = sum(p.numel() for p in model.parameters())
    return param_size / 1e6


def get_trainable_params(model: nn.Module) -> Dict[str, int]:
    """
    Get count of trainable and non-trainable parameters.
    
    Args:
        model: PyTorch model
        
    Returns:
        Dictionary with 'trainable' and 'total' parameter counts
    """
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    return {
        'trainable': trainable,
        'total': total,
        'non_trainable': total - trainable
    }