Spaces:

mrfakename
/

DMOSpeech2

Running on Zero

App Files Files Community

DMOSpeech2 / funasr_detach /models /transformer /utils /multi_layer_conv.py

mrfakename

Super-squash branch 'main' using huggingface_hub

0102e16 verified 10 months ago

raw

history blame

4.84 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# Copyright 2019 Tomoki Hayashi
	# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)

	"""Layer modules for FFT block in FastSpeech (Feed-forward Transformer)."""

	import torch


	class MultiLayeredConv1d(torch.nn.Module):
	"""Multi-layered conv1d for Transformer block.

	This is a module of multi-leyered conv1d designed
	to replace positionwise feed-forward network
	in Transforner block, which is introduced in
	`FastSpeech: Fast, Robust and Controllable Text to Speech`_.

	.. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
	https://arxiv.org/pdf/1905.09263.pdf

	"""

	def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
	"""Initialize MultiLayeredConv1d module.

	Args:
	in_chans (int): Number of input channels.
	hidden_chans (int): Number of hidden channels.
	kernel_size (int): Kernel size of conv1d.
	dropout_rate (float): Dropout rate.

	"""
	super(MultiLayeredConv1d, self).__init__()
	self.w_1 = torch.nn.Conv1d(
	in_chans,
	hidden_chans,
	kernel_size,
	stride=1,
	padding=(kernel_size - 1) // 2,
	)
	self.w_2 = torch.nn.Conv1d(
	hidden_chans,
	in_chans,
	kernel_size,
	stride=1,
	padding=(kernel_size - 1) // 2,
	)
	self.dropout = torch.nn.Dropout(dropout_rate)

	def forward(self, x):
	"""Calculate forward propagation.

	Args:
	x (torch.Tensor): Batch of input tensors (B, T, in_chans).

	Returns:
	torch.Tensor: Batch of output tensors (B, T, hidden_chans).

	"""
	x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
	return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1)


	class FsmnFeedForward(torch.nn.Module):
	"""Position-wise feed forward for FSMN blocks.

	This is a module of multi-leyered conv1d designed
	to replace position-wise feed-forward network
	in FSMN block.
	"""

	def __init__(self, in_chans, hidden_chans, out_chans, kernel_size, dropout_rate):
	"""Initialize FsmnFeedForward module.

	Args:
	in_chans (int): Number of input channels.
	hidden_chans (int): Number of hidden channels.
	out_chans (int): Number of output channels.
	kernel_size (int): Kernel size of conv1d.
	dropout_rate (float): Dropout rate.

	"""
	super(FsmnFeedForward, self).__init__()
	self.w_1 = torch.nn.Conv1d(
	in_chans,
	hidden_chans,
	kernel_size,
	stride=1,
	padding=(kernel_size - 1) // 2,
	)
	self.w_2 = torch.nn.Conv1d(
	hidden_chans,
	out_chans,
	kernel_size,
	stride=1,
	padding=(kernel_size - 1) // 2,
	bias=False,
	)
	self.norm = torch.nn.LayerNorm(hidden_chans)
	self.dropout = torch.nn.Dropout(dropout_rate)

	def forward(self, x, ilens=None):
	"""Calculate forward propagation.

	Args:
	x (torch.Tensor): Batch of input tensors (B, T, in_chans).

	Returns:
	torch.Tensor: Batch of output tensors (B, T, out_chans).

	"""
	x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
	return (
	self.w_2(self.norm(self.dropout(x)).transpose(-1, 1)).transpose(-1, 1),
	ilens,
	)


	class Conv1dLinear(torch.nn.Module):
	"""Conv1D + Linear for Transformer block.

	A variant of MultiLayeredConv1d, which replaces second conv-layer to linear.

	"""

	def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
	"""Initialize Conv1dLinear module.

	Args:
	in_chans (int): Number of input channels.
	hidden_chans (int): Number of hidden channels.
	kernel_size (int): Kernel size of conv1d.
	dropout_rate (float): Dropout rate.

	"""
	super(Conv1dLinear, self).__init__()
	self.w_1 = torch.nn.Conv1d(
	in_chans,
	hidden_chans,
	kernel_size,
	stride=1,
	padding=(kernel_size - 1) // 2,
	)
	self.w_2 = torch.nn.Linear(hidden_chans, in_chans)
	self.dropout = torch.nn.Dropout(dropout_rate)

	def forward(self, x):
	"""Calculate forward propagation.

	Args:
	x (torch.Tensor): Batch of input tensors (B, T, in_chans).

	Returns:
	torch.Tensor: Batch of output tensors (B, T, hidden_chans).

	"""
	x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
	return self.w_2(self.dropout(x))