|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import copy |
|
|
|
|
|
from transformers.configuration_utils import PretrainedConfig |
|
|
from transformers.utils import logging |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
class HiFiGanConfig(PretrainedConfig): |
|
|
def __init__( |
|
|
self, |
|
|
in_channels = 80, |
|
|
base_channels = 512, |
|
|
nb_harmonics = 8, |
|
|
sampling_rate =24000, |
|
|
nsf_alpha= 0.1, |
|
|
nsf_sigma= 0.003, |
|
|
nsf_voiced_threshold = 10, |
|
|
upsample_rates = [8, 5, 3], |
|
|
upsample_kernel_sizes = [16, 11, 7], |
|
|
istft_params ={'n_fft': 16, |
|
|
'hop_len': 4, |
|
|
}, |
|
|
resblock_kernel_sizes = [3, 7, 11], |
|
|
resblock_dilation_sizes = [[1, 3, 5], [1, 3, 5], [1, 3, 5]], |
|
|
source_resblock_kernel_sizes = [7, 7, 11], |
|
|
source_resblock_dilation_sizes = [[1, 3, 5], [1, 3, 5], [1, 3, 5]], |
|
|
lrelu_slope = 0.1, |
|
|
audio_limit =0.99, |
|
|
f0_predictor_config={ |
|
|
'num_class': 1, |
|
|
'in_channels': 80, |
|
|
'cond_channels': 512 |
|
|
}, |
|
|
**kwargs): |
|
|
super().__init__(**kwargs) |
|
|
|
|
|
self.in_channels = in_channels |
|
|
self.base_channels = base_channels |
|
|
self.nb_harmonics = nb_harmonics |
|
|
self.sampling_rate = sampling_rate |
|
|
self.nsf_alpha = nsf_alpha |
|
|
self.nsf_sigma = nsf_sigma |
|
|
self.nsf_voiced_threshold = nsf_voiced_threshold |
|
|
self.upsample_rates = upsample_rates |
|
|
self.upsample_kernel_sizes = upsample_kernel_sizes |
|
|
self.istft_params = istft_params |
|
|
self.resblock_kernel_sizes = resblock_kernel_sizes |
|
|
self.resblock_dilation_sizes= resblock_dilation_sizes |
|
|
self.source_resblock_kernel_sizes = source_resblock_kernel_sizes |
|
|
self.source_resblock_dilation_sizes = source_resblock_dilation_sizes |
|
|
self.lrelu_slope = lrelu_slope |
|
|
self.audio_limit = audio_limit |
|
|
self.f0_predictor_config = f0_predictor_config |
|
|
pass |
|
|
|
|
|
|
|
|
def to_dict(self): |
|
|
""" |
|
|
Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. |
|
|
|
|
|
Returns: |
|
|
`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, |
|
|
""" |
|
|
output = copy.deepcopy(self.__dict__) |
|
|
output['in_channels'] = self.in_channels |
|
|
output['base_channels'] = self.base_channels |
|
|
output['nb_harmonics'] = self.nb_harmonics |
|
|
output['sampling_rate'] = self.sampling_rate |
|
|
output['nsf_alpha'] = self.nsf_alpha |
|
|
output['nsf_sigma'] = self.nsf_sigma |
|
|
output['nsf_voiced_threshold'] = self.nsf_voiced_threshold |
|
|
output['upsample_rates'] = self.upsample_rates |
|
|
output['upsample_kernel_sizes'] = self.upsample_kernel_sizes |
|
|
output['istft_params'] = self.istft_params |
|
|
output['resblock_kernel_sizes'] = self.resblock_kernel_sizes |
|
|
output['resblock_dilation_sizes'] = self.resblock_dilation_sizes |
|
|
output['source_resblock_dilation_sizes'] = self.source_resblock_dilation_sizes |
|
|
output['lrelu_slope'] = self.lrelu_slope |
|
|
output['audio_limit'] = self.audio_limit |
|
|
output['f0_predictor_config'] = self.f0_predictor_config |
|
|
|
|
|
return output |
|
|
|