import os import sys import uuid from pathlib import Path from contextlib import contextmanager import numpy as np import torch import gradio as gr import librosa from BeatNet.BeatNet import BeatNet from pyharp.core import ModelCard, build_endpoint from pyharp.media.audio import save_audio from pyharp import LabelList, AudioLabel, OutputLabel from audiotools import AudioSignal import logging, sys logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s", handlers=[logging.StreamHandler(sys.stderr)], ) log = logging.getLogger("app") LOUDNESS_DB = -16. SAMPLE_RATE = 48_000 ENCODEC_SAMPLE_RATE = 16_000 AUDIOSEAL_SAMPLE_RATE = 16_000 model_card = ModelCard( name="BeatNet Beat Detection", description=("Beat detection for audio."), author="Mojtaba Heydari, Frank Cwitkowitz, Zhiyao Duan", tags=["beat detection"] ) log.info("Initializing BeatNet model...") estimator = BeatNet(1, mode="offline", inference_model="DBN", plot=[], thread=False) def process_fn(inp_audio): output = estimator.process(inp_audio) output_labels = LabelList() for t, b in output: output_labels.labels.append( AudioLabel( t = t, label = f"{b}", description = f"Beat: {b}", color = OutputLabel.rgb_color_to_int(0, 164, 235), amplitude = 1.0 if b == 1.0 else 0.0 ) ) return inp_audio, output_labels with gr.Blocks() as app: gr.Markdown("## BeatNet Beat Detection") # Inputs input_audio = gr.Audio( label="Input Audio", type="filepath", sources=["upload", "microphone"] ) # Outputs output_wav = gr.Audio( type="filepath", label="Watermarked Speech" ) output_label = gr.JSON(label="Watermark Confidence") _ = build_endpoint( model_card=model_card, input_components=[ input_audio ], output_components=[ output_wav, output_label ], process_fn=process_fn ) if __name__ == '__main__': app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))