Spaces:

JacobLinCool
/

tja-generator

Sleeping

tja-generator / preprocess.py

github-actions[bot]

Sync to HuggingFace Spaces

9df2e22 over 1 year ago

5.58 kB

	import matplotlib.pyplot as plt
	import numpy as np
	import soundfile as sf
	from librosa.filters import mel
	from scipy import signal
	from scipy.fftpack import fft


	class Audio:
	"""
	audio class which holds music data and timestamp for notes.

	Args:
	filename: file name.
	stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
	Variables:


	Example:
	>>>from music_processor import *
	>>>song = Audio(filename)
	>>># to get audio data
	>>>song.data
	>>># to import .tja files:
	>>>song.import_tja(filename)
	>>># to get data converted
	>>>song.data = (song.data[:,0]+song.data[:,1])/2
	>>>fft_and_melscale(song, include_zero_cross=False)
	"""

	def __init__(self, data, samplerate, stereo=True):
	self.data = data
	self.samplerate = samplerate
	if stereo is False:
	self.data = (self.data[:, 0] + self.data[:, 1]) / 2
	self.timestamp = []

	def plotaudio(self, start_t, stop_t):
	plt.plot(
	np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
	)
	plt.show()

	def save(self, filename, start_t=0, stop_t=None):
	if stop_t is None:
	stop_t = self.data.shape[0]
	sf.write(filename, self.data[start_t:stop_t], self.samplerate)

	def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
	donsound = sf.read(don)[0]
	donsound = (donsound[:, 0] + donsound[:, 1]) / 2
	kasound = sf.read(ka)[0]
	kasound = (kasound[:, 0] + kasound[:, 1]) / 2
	donlen = len(donsound)
	kalen = len(kasound)

	if diff is True:
	for stamp in self.timestamp:
	timing = int(stamp[0] * self.samplerate)
	try:
	if stamp[1] in (1, 3, 5, 6, 7):
	self.data[timing : timing + donlen] += donsound
	elif stamp[1] in (2, 4):
	self.data[timing : timing + kalen] += kasound
	except ValueError:
	pass

	elif diff == "don":
	if isinstance(self.timestamp[0], tuple):
	for stamp in self.timestamp:
	if stamp * self.samplerate + donlen < self.data.shape[0]:
	self.data[
	int(stamp[0] * self.samplerate) : int(
	stamp[0] * self.samplerate
	)
	+ donlen
	] += donsound
	else:
	for stamp in self.timestamp:
	if stamp * self.samplerate + donlen < self.data.shape[0]:
	self.data[
	int(stamp * self.samplerate) : int(stamp * self.samplerate)
	+ donlen
	] += donsound

	elif diff == "ka":
	if isinstance(self.timestamp[0], tuple):
	for stamp in self.timestamp:
	if stamp * self.samplerate + kalen < self.data.shape[0]:
	self.data[
	int(stamp[0] * self.samplerate) : int(
	stamp[0] * self.samplerate
	)
	+ kalen
	] += kasound
	else:
	for stamp in self.timestamp:
	if stamp * self.samplerate + kalen < self.data.shape[0]:
	self.data[
	int(stamp * self.samplerate) : int(stamp * self.samplerate)
	+ kalen
	] += kasound


	def make_frame(data, nhop, nfft):
	"""
	helping function for fftandmelscale.
	細かい時間に切り分けたものを学習データとするため，nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
	"""

	length = data.shape[0]
	framedata = np.concatenate((data, np.zeros(nfft))) # zero padding
	return np.array(
	[framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
	)


	# @jit
	def fft_and_melscale(
	song,
	nhop=512,
	nffts=[1024, 2048, 4096],
	mel_nband=80,
	mel_freqlo=27.5,
	mel_freqhi=16000.0,
	include_zero_cross=False,
	):
	"""
	fft and melscale method.
	fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う．
	melscale: 周波数の次元を削減するとともに，log10の値を取っている．
	"""

	feat_channels = []

	for nfft in nffts:
	feats = []
	window = signal.windows.blackmanharris(nfft)
	filt = mel(
	sr=song.samplerate,
	n_fft=nfft,
	n_mels=mel_nband,
	fmin=mel_freqlo,
	fmax=mel_freqhi,
	)

	# get normal frame
	frame = make_frame(song.data, nhop, nfft)
	# print(frame.shape)

	# melscaling
	processedframe = fft(window * frame)[:, : nfft // 2 + 1]
	processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
	processedframe = 20 * np.log10(processedframe + 0.1)
	# print(processedframe.shape)

	feat_channels.append(processedframe)

	if include_zero_cross:
	song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
	print(song.zero_crossing)

	return np.array(feat_channels)