import tensorflow as tf import keras import numpy as np @keras.saving.register_keras_serializable() class AdaCosLoss(tf.keras.losses.Loss): """ Adaptive Cosine Loss (AdaCos). Implements the AdaCos loss function as described in: "AdaCos: Adaptively Scaling Cosine Logits for Effectively Learning Deep Face Representations" (Zhang et al., 2019). Args: num_classes (int): Number of classes in the classification problem. name (str, optional): Name for the loss instance. """ def __init__(self, num_classes=None, name="AdaCos", **kwargs): super().__init__(name=name, **kwargs) self.num_classes = num_classes self.scale = tf.Variable( np.sqrt(2) * np.log(num_classes - 1), dtype=tf.float32, trainable=False ) def call(self, y_true, y_pred): """ Args: y_true: (batch_size,) integer labels [0, num_classes-1]. y_pred: (batch_size, num_classes) classification cosine similarities. Returns: Tensor scalar: Mean AdaCos loss over the batch. """ y_true = tf.cast(y_true, tf.int32) y_pred = tf.clip_by_value( y_pred, -1.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon() ) # correct class mask mask = tf.one_hot(y_true, depth=self.num_classes) # shape (batch_size, n_classes) # get theta angles for corresponding class theta_true = tf.math.acos(tf.boolean_mask(y_pred, mask)) # shape (batch_size,) # compute median of 'correct' angles theta_med = tf.keras.ops.median(theta_true) # get non-corresponding cosine values (cos(theta) j is not yi) neg_mask = tf.logical_not(mask > 0) # shape (batch_size, n_classes) cos_theta_neg = tf.boolean_mask(y_pred, neg_mask) # shape (batch_size*(n_classes-1),) neg_y_pred = tf.reshape(cos_theta_neg, [-1, self.num_classes - 1]) # shape (batch_size, n_classes-1) B_avg = tf.reduce_mean(tf.reduce_sum(tf.math.exp(self.scale * neg_y_pred), axis=-1)) #B_avg = tf.cast(B_avg, tf.float32) #with tf.control_dependencies([theta_med, B_avg]): new_scale = ( tf.math.log(B_avg) / tf.math.cos(tf.minimum(tf.constant(np.pi / 4), theta_med)) ) # keep current scale if new_scale is invalid safe_scale = tf.cond( tf.math.is_finite(new_scale) & (new_scale > 0), lambda: new_scale, lambda: self.scale ) self.scale.assign(safe_scale) logits = self.scale * y_pred loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=logits) return tf.reduce_mean(loss) def get_config(self): base_config = super().get_config() return {**base_config, 'num_classes': self.num_classes} def __repr__(self): return (f"{self.__class__.__name__}(num_classes={self.num_classes}, " f"name='{self.name}')") def __str__(self): return self.__repr__() @property def num_classes(self): return self._num_classes @num_classes.setter def num_classes(self, value): if not isinstance(value, int): raise TypeError(f"`num_classes` must be an int, got {type(value).__name__}") if value < 2: raise ValueError(f"`num_classes` must be >= 2, got {value}") self._num_classes = value @keras.saving.register_keras_serializable() class AdaCosLossMargin(tf.keras.losses.Loss): """ Adaptive Cosine Loss with Margin (AdaCosMargin). Extends AdaCos by introducing a fixed margin penalty for the target class logits, encouraging greater separation between classes in angular (cosine) space. Reference: - AdaCos: Adaptively Scaling Cosine Logits for Effectively Learning Deep Face Representations (Zhang et al., 2019) - Large Margin Cosine Loss (CosFace): https://arxiv.org/abs/1801.09414 Args: margin (float): Margin to subtract from the target class cosine similarity (0.0–1.0). num_classes (int): Number of classes. name (str, optional): Name for the loss. """ def __init__(self, margin=0.1, num_classes=None, name="AdaCosLossMargin", **kwargs): super().__init__(name=name, **kwargs) self.margin = margin self.num_classes = num_classes self.scale = tf.Variable( np.sqrt(2) * np.log(num_classes - 1), dtype=tf.float32, trainable=False ) def call(self, y_true, y_pred): """ Args: y_true: (batch_size,) integer labels [0, num_classes-1]. y_pred: (batch_size, num_classes) cosine similarities. Returns: Tensor scalar: Mean AdaCosMargin loss over the batch. """ batch_size = tf.shape(y_pred)[0] y_true = tf.cast(y_true, tf.int32) y_pred = tf.clip_by_value( y_pred, -1.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon() ) mask = tf.one_hot(y_true, depth=self.num_classes) theta_true = tf.math.acos(tf.boolean_mask(y_pred, mask)) theta_med = tf.keras.ops.median(theta_true) neg_mask = tf.cast(tf.logical_not(mask > 0), dtype=tf.float32) cos_theta_neg = tf.boolean_mask(y_pred, neg_mask) neg_y_pred = tf.reshape(cos_theta_neg, [batch_size, self.num_classes - 1]) B_avg = tf.reduce_mean(tf.reduce_sum(tf.math.exp(self.scale * neg_y_pred), axis=-1)) B_avg = tf.cast(B_avg, tf.float32) with tf.control_dependencies([theta_med, B_avg]): new_scale = ( tf.math.log(B_avg) / tf.math.cos(tf.minimum(tf.constant(np.pi / 4), theta_med)) ) safe_scale = tf.cond( tf.math.is_finite(new_scale) & (new_scale > 0), lambda: new_scale, lambda: self.scale ) self.scale.assign(safe_scale) logits = self.scale * (y_pred - self.margin * mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=logits) return tf.reduce_mean(loss) def get_config(self): base_config = super().get_config() return { **base_config, 'num_classes': self.num_classes, 'margin': self.margin } def __repr__(self): return (f"{self.__class__.__name__}(margin={self.margin}, num_classes={self.num_classes}, " f"name='{self.name}')") def __str__(self): return self.__repr__() @property def num_classes(self): return self._num_classes @num_classes.setter def num_classes(self, value): if not isinstance(value, int): raise TypeError(f"`num_classes` must be an int, got {type(value).__name__}") if value < 2: raise ValueError(f"`num_classes` must be >= 2, got {value}") self._num_classes = value @property def margin(self): return self._margin @margin.setter def margin(self, value): if not isinstance(value, (float, int)): raise TypeError(f"`margin` must be a float or int, got {type(value).__name__}") value = float(value) if not (0.0 <= value <= 1.0): raise ValueError(f"`margin` must be between 0.0 and 1.0, got {value}") self._margin = value