File size: 3,820 Bytes
798e88a
959c235
93c3ca9
798e88a
 
 
 
93c3ca9
 
 
798e88a
 
 
 
 
 
 
93c3ca9
798e88a
 
 
 
 
 
93c3ca9
798e88a
93c3ca9
 
798e88a
93c3ca9
798e88a
93c3ca9
 
798e88a
 
 
 
93c3ca9
 
798e88a
 
 
93c3ca9
 
798e88a
 
 
 
 
 
 
 
93c3ca9
 
798e88a
93c3ca9
 
798e88a
 
 
 
 
 
93c3ca9
204c40e
798e88a
 
 
 
 
74bdbea
 
93c3ca9
 
 
 
 
 
 
 
74bdbea
 
 
 
 
 
93c3ca9
 
74bdbea
 
c8f49bb
 
93c3ca9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import gradio as gr

import numpy as np
import soundfile as sf
from scipy import signal

def extract_phantom_center_test(input_file, output_format, rdf=0.99999):
    output_file = f"other.{output_format}"
    output_center_file = f"center.{output_format}"
    data, samplerate = sf.read(input_file)
    
    if data.ndim != 2 or data.shape[1] != 2:
        raise ValueError("Требуется стереофайл (2 канала)")

    left = data[:, 0]
    right = data[:, 1]
    mono = np.mean(data, axis=1)

    nperseg = samplerate  # Размер окна
    noverlap = nperseg // 2  # Перекрытие окон

    f, t, Z_left = signal.stft(left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
    f, t, Z_right = signal.stft(right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
    f, t, Z_mono = signal.stft(mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap)

    Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))
    Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(1j*np.angle(Z_mono))

    reduction_factor = rdf

    Z_new_left = Z_left - Z_common_left * reduction_factor
    Z_new_right = Z_right - Z_common_right * reduction_factor

    _, new_left = signal.istft(Z_new_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
    _, new_right = signal.istft(Z_new_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)

    _, common_signal_left = signal.istft(Z_common_left, fs=samplerate, nperseg=nperseg, noverlap=noverlap)
    _, common_signal_right = signal.istft(Z_common_right, fs=samplerate, nperseg=nperseg, noverlap=noverlap)

    new_left = new_left[:len(left)]
    new_right = new_right[:len(right)]
    common_signal_left = common_signal_left[:len(left)]
    common_signal_right = common_signal_right[:len(right)]

    peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
    if peak > 1.0:
        new_left = new_left / peak
        new_right = new_right / peak

    sf.write(output_file, np.column_stack((new_left, new_right)), samplerate)

    inverted_center_left = -common_signal_left
    inverted_center_right = -common_signal_right

    mixed_left = left + inverted_center_left
    mixed_right = right + inverted_center_right

    peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
    if peak_mixed > 1.0:
        mixed_left = mixed_left / peak_mixed
        mixed_right = mixed_right / peak_mixed

    sf.write(output_center_file, np.column_stack((common_signal_left, common_signal_right)), samplerate)  # Моно фантомный центр в оба канала
    # sf.write(output_file, np.column_stack((mixed_left, mixed_right)), samplerate)

    return (output_file, output_center_file)

with gr.Blocks(title="Phantom Center Extraction", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Phantom Center Extraction")
    with gr.Row():
        input_audio = gr.Audio(label="Upload audio", type="filepath")
    with gr.Row():
        reduction_f = gr.Slider(0.1, 10, value=1, step=0.1, label="Reduction dB", interactive=True, visible=False)
    with gr.Row():
        format = gr.Dropdown(
                            ["flac", "wav"],
                            value="flac",
                            label="Export format"
                        )
    with gr.Row():
        extract_btn = gr.Button("Separate")
    with gr.Row():
        mid = gr.Audio(type="filepath", interactive=False, label="Phantom Center", visible=True)
        side = gr.Audio(type="filepath", interactive=False, label="Other", visible=True)
    extract_btn.click(
        fn=extract_phantom_center_test,
        inputs=[input_audio, format, reduction_f],
        outputs=[side, mid]
    )

if __name__ == "__main__": 
    demo.launch(server_name="0.0.0.0")