tianfengping.tfp
init
149fbcd
raw
history blame
1.38 kB
import torch
import torch.nn.functional as F
def tpr_loss(disc_real_outputs, disc_generated_outputs, tau):
loss = 0
for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
m_DG = torch.median((dr - dg))
L_rel = torch.mean((((dr - dg) - m_DG) ** 2)[dr < dg + m_DG])
loss += tau - F.relu(tau - L_rel)
return loss
def mel_loss(real_speech, generated_speech, mel_transforms):
loss = 0
for transform in mel_transforms:
mel_r = transform(real_speech)
mel_g = transform(generated_speech)
loss += F.l1_loss(mel_g, mel_r)
return loss
def OrthogonalityLoss(speaker_embedding, emotion_embedding):
speaker_embedding_t = speaker_embedding.t()
dot_product_matrix = torch.matmul(emotion_embedding, speaker_embedding_t)
emotion_norms = torch.norm(emotion_embedding, dim=1, keepdim=True)
speaker_norms = torch.norm(speaker_embedding, dim=1, keepdim=True).t()
normalized_dot_product_matrix = dot_product_matrix / (emotion_norms * speaker_norms)
ort_loss = torch.norm(normalized_dot_product_matrix, p='fro')**2
cosine_sim = F.cosine_similarity(emotion_embedding.unsqueeze(2), speaker_embedding.unsqueeze(1), dim=-1)
cosine_ort_loss = torch.norm(cosine_sim.mean(dim=-1), p='fro') ** 2
return 0.01 * (ort_loss + cosine_ort_loss)