sensenova
/

InteractiveOmni-8B

interactiveomni

Model card Files Files and versions

InteractiveOmni-8B / configuration_hifigan.py

tongww's picture

upload initial model

4cffcdc verified 9 days ago

history blame contribute delete

3.54 kB

	# --------------------------------------------------------
	# SenseTime
	# Copyright (c) 2025 SenseTime
	# Licensed under The MIT License [see LICENSE for details]
	# --------------------------------------------------------
	import copy

	from transformers.configuration_utils import PretrainedConfig
	from transformers.utils import logging

	logger = logging.get_logger(__name__)

	class HiFiGanConfig(PretrainedConfig):
	def __init__(
	self,
	in_channels = 80,
	base_channels = 512,
	nb_harmonics = 8,
	sampling_rate =24000,
	nsf_alpha= 0.1,
	nsf_sigma= 0.003,
	nsf_voiced_threshold = 10,
	upsample_rates = [8, 5, 3],
	upsample_kernel_sizes = [16, 11, 7],
	istft_params ={'n_fft': 16,
	'hop_len': 4,
	},
	resblock_kernel_sizes = [3, 7, 11],
	resblock_dilation_sizes = [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
	source_resblock_kernel_sizes = [7, 7, 11],
	source_resblock_dilation_sizes = [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
	lrelu_slope = 0.1,
	audio_limit =0.99,
	f0_predictor_config={
	'num_class': 1,
	'in_channels': 80,
	'cond_channels': 512
	},
	**kwargs):
	super().__init__(**kwargs)

	self.in_channels = in_channels
	self.base_channels = base_channels
	self.nb_harmonics = nb_harmonics
	self.sampling_rate = sampling_rate
	self.nsf_alpha = nsf_alpha
	self.nsf_sigma = nsf_sigma
	self.nsf_voiced_threshold = nsf_voiced_threshold
	self.upsample_rates = upsample_rates
	self.upsample_kernel_sizes = upsample_kernel_sizes
	self.istft_params = istft_params
	self.resblock_kernel_sizes = resblock_kernel_sizes
	self.resblock_dilation_sizes= resblock_dilation_sizes
	self.source_resblock_kernel_sizes = source_resblock_kernel_sizes
	self.source_resblock_dilation_sizes = source_resblock_dilation_sizes
	self.lrelu_slope = lrelu_slope
	self.audio_limit = audio_limit
	self.f0_predictor_config = f0_predictor_config
	pass


	def to_dict(self):
	"""
	Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].

	Returns:
	`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
	"""
	output = copy.deepcopy(self.__dict__)
	output['in_channels'] = self.in_channels
	output['base_channels'] = self.base_channels
	output['nb_harmonics'] = self.nb_harmonics
	output['sampling_rate'] = self.sampling_rate
	output['nsf_alpha'] = self.nsf_alpha
	output['nsf_sigma'] = self.nsf_sigma
	output['nsf_voiced_threshold'] = self.nsf_voiced_threshold
	output['upsample_rates'] = self.upsample_rates
	output['upsample_kernel_sizes'] = self.upsample_kernel_sizes
	output['istft_params'] = self.istft_params
	output['resblock_kernel_sizes'] = self.resblock_kernel_sizes
	output['resblock_dilation_sizes'] = self.resblock_dilation_sizes
	output['source_resblock_dilation_sizes'] = self.source_resblock_dilation_sizes
	output['lrelu_slope'] = self.lrelu_slope
	output['audio_limit'] = self.audio_limit
	output['f0_predictor_config'] = self.f0_predictor_config

	return output