File size: 3,820 Bytes
798e88a 959c235 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 798e88a 93c3ca9 204c40e 798e88a 74bdbea 93c3ca9 74bdbea 93c3ca9 74bdbea c8f49bb 93c3ca9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import gradio as gr
import numpy as np
import soundfile as sf
from scipy import signal
def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
output_file = f"other.{output_format}"
output_center_file = f"center.{output_format}"
data, samplerate = sf.read(input_file)
if data.ndim != 2 or data.shape[1] != 2:
raise ValueError("Требуется стереофайл (2 канала)")
left = data[:, 0]
right = data[:, 1]
mono = np.mean(data, axis=1)
nperseg = samplerate # Размер окна
noverlap = nperseg // 2 # Перекрытие окон
f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
reduction_factor = rdf
Z_new_left = Z_left - Z_common_left * reduction_factor
Z_new_right = Z_right - Z_common_right * reduction_factor
_, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
_, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
_, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
_, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
new_left = new_left[:len(left)]
new_right = new_right[:len(right)]
common_signal_left = common_signal_left[:len(left)]
common_signal_right = common_signal_right[:len(right)]
peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
if peak > 1.0:
new_left = new_left / peak
new_right = new_right / peak
sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)
inverted_center_left = -common_signal_left
inverted_center_right = -common_signal_right
mixed_left = left + inverted_center_left
mixed_right = right + inverted_center_right
peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
if peak_mixed > 1.0:
mixed_left = mixed_left / peak_mixed
mixed_right = mixed_right / peak_mixed
sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate) # Моно фантомный центр в оба канала
# sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)
return (output_file, output_center_file)
with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Phantom Center Extraction")
with gr.Row():
input_audio = gr.Audio(label="Upload audio", type="filepath")
with gr.Row():
reduction_f = gr.Slider(0.1, 10, value=1, step=0.1, label="Reduction dB", interactive=True, visible=False)
with gr.Row():
format = gr.Dropdown(
["flac", "wav"],
value="flac",
label="Export format"
)
with gr.Row():
extract_btn = gr.Button("Separate")
with gr.Row():
mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
extract_btn.click(
fn=extract_phantom_center_test,
inputs=[input_audio, format, reduction_f],
outputs=[side, mid]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")
|