BeatNet / app.py
ellagranger's picture
Docker output fix & BeatNet fix
384d531
import os
import sys
import uuid
from pathlib import Path
from contextlib import contextmanager
import numpy as np
import torch
import gradio as gr
import librosa
from BeatNet.BeatNet import BeatNet
from pyharp.core import ModelCard, build_endpoint
from pyharp.media.audio import save_audio
from pyharp import LabelList, AudioLabel, OutputLabel
from audiotools import AudioSignal
import logging, sys
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
handlers=[logging.StreamHandler(sys.stderr)],
)
log = logging.getLogger("app")
LOUDNESS_DB = -16.
SAMPLE_RATE = 48_000
ENCODEC_SAMPLE_RATE = 16_000
AUDIOSEAL_SAMPLE_RATE = 16_000
model_card = ModelCard(
name="BeatNet Beat Detection",
description=("Beat detection for audio."),
author="Mojtaba Heydari, Frank Cwitkowitz, Zhiyao Duan",
tags=["beat detection"]
)
log.info("Initializing BeatNet model...")
estimator = BeatNet(1, mode="offline", inference_model="DBN", plot=[], thread=False)
def process_fn(inp_audio):
output = estimator.process(inp_audio)
output_labels = LabelList()
for t, b in output:
output_labels.labels.append(
AudioLabel(
t = t,
label = f"{b}",
description = f"Beat: {b}",
color = OutputLabel.rgb_color_to_int(0, 164, 235),
amplitude = 1.0 if b == 1.0 else 0.0
)
)
return inp_audio, output_labels
with gr.Blocks() as app:
gr.Markdown("## BeatNet Beat Detection")
# Inputs
input_audio = gr.Audio(
label="Input Audio",
type="filepath",
sources=["upload", "microphone"]
)
# Outputs
output_wav = gr.Audio(
type="filepath",
label="Watermarked Speech"
)
output_label = gr.JSON(label="Watermark Confidence")
_ = build_endpoint(
model_card=model_card,
input_components=[
input_audio
],
output_components=[
output_wav,
output_label
],
process_fn=process_fn
)
if __name__ == '__main__':
app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))