Spaces:
Running
Running
| import torch | |
| import torch.nn.functional as F | |
| def tpr_loss(disc_real_outputs, disc_generated_outputs, tau): | |
| loss = 0 | |
| for dr, dg in zip(disc_real_outputs, disc_generated_outputs): | |
| m_DG = torch.median((dr - dg)) | |
| L_rel = torch.mean((((dr - dg) - m_DG) ** 2)[dr < dg + m_DG]) | |
| loss += tau - F.relu(tau - L_rel) | |
| return loss | |
| def mel_loss(real_speech, generated_speech, mel_transforms): | |
| loss = 0 | |
| for transform in mel_transforms: | |
| mel_r = transform(real_speech) | |
| mel_g = transform(generated_speech) | |
| loss += F.l1_loss(mel_g, mel_r) | |
| return loss | |
| def OrthogonalityLoss(speaker_embedding, emotion_embedding): | |
| speaker_embedding_t = speaker_embedding.t() | |
| dot_product_matrix = torch.matmul(emotion_embedding, speaker_embedding_t) | |
| emotion_norms = torch.norm(emotion_embedding, dim=1, keepdim=True) | |
| speaker_norms = torch.norm(speaker_embedding, dim=1, keepdim=True).t() | |
| normalized_dot_product_matrix = dot_product_matrix / (emotion_norms * speaker_norms) | |
| ort_loss = torch.norm(normalized_dot_product_matrix, p='fro')**2 | |
| cosine_sim = F.cosine_similarity(emotion_embedding.unsqueeze(2), speaker_embedding.unsqueeze(1), dim=-1) | |
| cosine_ort_loss = torch.norm(cosine_sim.mean(dim=-1), p='fro') ** 2 | |
| return 0.01 * (ort_loss + cosine_ort_loss) | |