| import os | |
| import sys | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| sys.path.append(os.getcwd()) | |
| from main.library.predictors.FCPE.attentions import SelfAttention | |
| from main.library.predictors.FCPE.utils import calc_same_padding, Transpose, GLU, Swish | |
| class ConformerConvModule_LEGACY(nn.Module): | |
| def __init__( | |
| self, | |
| dim, | |
| causal=False, | |
| expansion_factor=2, | |
| kernel_size=31, | |
| dropout=0.0 | |
| ): | |
| super().__init__() | |
| inner_dim = dim * expansion_factor | |
| self.net = nn.Sequential( | |
| nn.LayerNorm(dim), | |
| Transpose((1, 2)), | |
| nn.Conv1d(dim, inner_dim * 2, 1), | |
| GLU(dim=1), | |
| DepthWiseConv1d_LEGACY( | |
| inner_dim, | |
| inner_dim, | |
| kernel_size=kernel_size, | |
| padding=( | |
| calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0) | |
| ) | |
| ), | |
| Swish(), | |
| nn.Conv1d(inner_dim, dim, 1), | |
| Transpose((1, 2)), | |
| nn.Dropout(dropout) | |
| ) | |
| def forward(self, x): | |
| return self.net(x) | |
| class ConformerConvModule(nn.Module): | |
| def __init__( | |
| self, | |
| dim, | |
| expansion_factor=2, | |
| kernel_size=31, | |
| dropout=0 | |
| ): | |
| super().__init__() | |
| inner_dim = dim * expansion_factor | |
| self.net = nn.Sequential( | |
| nn.LayerNorm(dim), | |
| Transpose((1, 2)), | |
| nn.Conv1d(dim, inner_dim * 2, 1), | |
| nn.GLU(dim=1), | |
| DepthWiseConv1d( | |
| inner_dim, | |
| inner_dim, | |
| kernel_size=kernel_size, | |
| padding=calc_same_padding(kernel_size)[0], | |
| groups=inner_dim | |
| ), | |
| nn.SiLU(), | |
| nn.Conv1d(inner_dim, dim, 1), | |
| Transpose((1, 2)), | |
| nn.Dropout(dropout) | |
| ) | |
| def forward(self, x): | |
| return self.net(x) | |
| class DepthWiseConv1d_LEGACY(nn.Module): | |
| def __init__( | |
| self, | |
| chan_in, | |
| chan_out, | |
| kernel_size, | |
| padding | |
| ): | |
| super().__init__() | |
| self.padding = padding | |
| self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups=chan_in) | |
| def forward(self, x): | |
| return self.conv(F.pad(x, self.padding)) | |
| class DepthWiseConv1d(nn.Module): | |
| def __init__( | |
| self, | |
| chan_in, | |
| chan_out, | |
| kernel_size, | |
| padding, | |
| groups | |
| ): | |
| super().__init__() | |
| self.conv = nn.Conv1d(chan_in, chan_out, kernel_size=kernel_size, padding=padding, groups=groups) | |
| def forward(self, x): | |
| return self.conv(x) | |
| class EncoderLayer(nn.Module): | |
| def __init__( | |
| self, | |
| parent | |
| ): | |
| super().__init__() | |
| self.conformer = ConformerConvModule_LEGACY(parent.dim_model) | |
| self.norm = nn.LayerNorm(parent.dim_model) | |
| self.dropout = nn.Dropout(parent.residual_dropout) | |
| self.attn = SelfAttention(dim=parent.dim_model, heads=parent.num_heads, causal=False) | |
| def forward(self, phone, mask=None): | |
| phone = phone + (self.attn(self.norm(phone), mask=mask)) | |
| return phone + (self.conformer(phone)) | |
| class ConformerNaiveEncoder(nn.Module): | |
| def __init__( | |
| self, | |
| num_layers, | |
| num_heads, | |
| dim_model, | |
| use_norm = False, | |
| conv_only = False, | |
| conv_dropout = 0, | |
| atten_dropout = 0 | |
| ): | |
| super().__init__() | |
| self.num_layers = num_layers | |
| self.num_heads = num_heads | |
| self.dim_model = dim_model | |
| self.use_norm = use_norm | |
| self.residual_dropout = 0.1 | |
| self.attention_dropout = 0.1 | |
| self.encoder_layers = nn.ModuleList([ | |
| CFNEncoderLayer(dim_model, num_heads, use_norm, conv_only, conv_dropout, atten_dropout) | |
| for _ in range(num_layers) | |
| ]) | |
| def forward(self, x, mask=None): | |
| for (_, layer) in enumerate(self.encoder_layers): | |
| x = layer(x, mask) | |
| return x | |
| class CFNEncoderLayer(nn.Module): | |
| def __init__( | |
| self, | |
| dim_model, | |
| num_heads = 8, | |
| use_norm = False, | |
| conv_only = False, | |
| conv_dropout = 0, | |
| atten_dropout = 0 | |
| ): | |
| super().__init__() | |
| self.conformer = ( | |
| nn.Sequential( | |
| ConformerConvModule(dim_model), | |
| nn.Dropout(conv_dropout) | |
| ) | |
| ) if conv_dropout > 0 else ( | |
| ConformerConvModule(dim_model) | |
| ) | |
| self.norm = nn.LayerNorm(dim_model) | |
| self.dropout = nn.Dropout(0.1) | |
| self.attn = SelfAttention( | |
| dim=dim_model, | |
| heads=num_heads, | |
| causal=False, | |
| use_norm=use_norm, | |
| dropout=atten_dropout | |
| ) if not conv_only else None | |
| def forward(self, x, mask=None): | |
| if self.attn is not None: x = x + (self.attn(self.norm(x), mask=mask)) | |
| return x + (self.conformer(x)) |