File size: 3,666 Bytes
05d6e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import torch


def extract_notes(onsets, frames, velocity, onset_threshold=0.5, frame_threshold=0.5):
    """
    Finds the note timings based on the onsets and frames information

    Parameters
    ----------
    onsets: torch.FloatTensor, shape = [frames, bins]
    frames: torch.FloatTensor, shape = [frames, bins]
    velocity: torch.FloatTensor, shape = [frames, bins]
    onset_threshold: float
    frame_threshold: float

    Returns
    -------
    pitches: np.ndarray of bin_indices
    intervals: np.ndarray of rows containing (onset_index, offset_index)
    velocities: np.ndarray of velocity values
    """
    # onsets_forward = torch.roll(onsets, shifts=(1, 0), dims=(0, 1))
    # onsets_forward[0, :] = 0
    # onsets_backward = torch.roll(onsets, shifts=(-1, 0), dims=(0, 1))
    # onsets_backward[-1, :] = 0
    # onsets_peak = torch.logical_and(onsets >= onsets_forward, onsets >= onsets_backward)
    # onsets_peak = torch.logical_and(onsets >= 0.25, onsets_peak)

    onsets = (onsets > onset_threshold).cpu().to(torch.uint8)
    frames = (frames > frame_threshold).cpu().to(torch.uint8)
    onset_diff = torch.cat([onsets[:1, :], onsets[1:, :] - onsets[:-1, :]], dim=0) == 1
    # onset_diff = torch.cat([frames[:1, :], frames[1:, :] - frames[:-1, :]], dim=0) == 1

    pitches = []
    intervals = []
    velocities = []

    # for nonzero in onsets_peak.nonzero(as_tuple=False):
    for nonzero in onset_diff.nonzero(as_tuple=False):
        frame = nonzero[0].item()
        pitch = nonzero[1].item()

        onset = frame
        offset = frame
        velocity_samples = []

        while onsets[offset, pitch].item() or frames[offset, pitch].item():
            if onsets[offset, pitch].item():
                # if frames[offset, pitch].item():
                velocity_samples.append(velocity[offset, pitch].item())
            offset += 1
            if offset == onsets.shape[0]:
                break

        if offset > onset:
            pitches.append(pitch)
            intervals.append([onset, offset])
            velocities.append(
                np.mean(velocity_samples) if len(velocity_samples) > 0 else 0
            )

    return np.array(pitches), np.array(intervals), np.array(velocities)


def notes_to_frames(pitches, intervals, shape, mask=None):
    """
    Takes lists specifying notes sequences and return

    Parameters
    ----------
    pitches: list of pitch bin indices
    intervals: list of [onset, offset] ranges of bin indices
    shape: the shape of the original piano roll, [n_frames, n_bins]

    Returns
    -------
    time: np.ndarray containing the frame indices
    freqs: list of np.ndarray, each containing the frequency bin indices
    """
    roll = np.zeros(tuple(shape))
    for pitch, (onset, offset) in zip(pitches, intervals):
        # print('pitch', pitch, onset, offset)
        # print('onset offset', onset, offset, pitch)
        roll[onset:offset, pitch] = 1
    if mask is not None:
        roll *= mask
    time = np.arange(roll.shape[0])
    freqs = [roll[t, :].nonzero()[0] for t in time]
    # if mask_size is not None:
    #     mask = np.zeros(tuple(shape))
    #     notes = roll.shape[1]
    #     for n in range(notes):
    #         onset_d = roll[1:, n] - roll[: -1, n]
    #         print('unique', np.unique(onset_d))
    #         onset_d[onset_d < 0] = 0
    #         print('n', n, onset_d.sum())
    #         onset_d = np.concatenate((np.zeros((1, 1)), roll[1:, n] - roll[: -1, n]))
    #         onset_d[onset_d < 0] = 0
    #         for r in range(mask_size):
    #             mask[:, n] += np.roll(onset_d, r)
    return time, freqs