File size: 3,026 Bytes

4089b4a

"""
Model definition and utilities
"""
from transformers import AutoModelForSequenceClassification, AutoConfig
from typing import Dict, Optional
import logging
import torch
import torch.nn as nn


def create_model(
    model_name: str,
    num_labels: int,
    label2id: Dict[str, int],
    id2label: Dict[int, str],
    dropout: Optional[float] = None
):
    """
    Create a sequence classification model with optional dropout configuration.
    
    Args:
        model_name: Name of the pretrained model
        num_labels: Number of classification labels
        label2id: Mapping from label names to IDs
        id2label: Mapping from IDs to label names
        dropout: Optional dropout probability for classifier head
        
    Returns:
        Initialized model
    """
    config = AutoConfig.from_pretrained(
        model_name,
        num_labels=num_labels,
        label2id=label2id,
        id2label=id2label
    )
    
    # Set dropout if provided
    if dropout is not None:
        if hasattr(config, 'hidden_dropout_prob'):
            config.hidden_dropout_prob = dropout
        if hasattr(config, 'attention_probs_dropout_prob'):
            config.attention_probs_dropout_prob = dropout
        if hasattr(config, 'classifier_dropout'):
            config.classifier_dropout = dropout
        logging.info(f"Set model dropout to {dropout}")
    
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config
    )
    
    return model


def apply_class_weights(
    model: nn.Module,
    class_weights: Optional[list] = None
) -> Optional[nn.Module]:
    """
    Apply class weights to the model's loss function.
    
    Args:
        model: The model to modify
        class_weights: List of weights for each class (must match num_labels)
        
    Returns:
        Model with modified loss function (if class_weights provided)
    """
    if class_weights is not None:
        weights_tensor = torch.tensor(class_weights, dtype=torch.float32)
        # Note: This requires custom Trainer with weighted loss
        logging.info(f"Class weights applied: {class_weights}")
        return weights_tensor
    return None


def get_model_size(model: nn.Module) -> float:
    """
    Calculate model size in millions of parameters.
    
    Args:
        model: PyTorch model
        
    Returns:
        Number of parameters in millions
    """
    param_size = sum(p.numel() for p in model.parameters())
    return param_size / 1e6


def get_trainable_params(model: nn.Module) -> Dict[str, int]:
    """
    Get count of trainable and non-trainable parameters.
    
    Args:
        model: PyTorch model
        
    Returns:
        Dictionary with 'trainable' and 'total' parameter counts
    """
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    return {
        'trainable': trainable,
        'total': total,
        'non_trainable': total - trainable
    }