Spaces:

Yoni232
/

count-the-notes

Running on Zero

Yoni232 commited on 4 days ago

Commit

3c8191e

1 Parent(s): 092edde

added ffmpeg dependency and fixed hf repo url

Files changed (2) hide show

app.py CHANGED Viewed

@@ -54,8 +54,8 @@ def transcribe_audio(
         # Map model choice to HuggingFace repo ID
         model_map = {
-            "MusicNet (Recommended)": "Yoni-Yaffe/countem-musicnet",
-            "Synth": "Yoni-Yaffe/countem-synth",
         }
         model_name = model_map[model_choice]
@@ -70,7 +70,7 @@ def transcribe_audio(
                 audio = audio.astype(np.float32) / 2147483648.0
         elif isinstance(audio_input, str):
             # Audio file path provided
-            audio, sr = sf.read(audio_input, dtype="float32")
         else:
             return None, f"Error: Unexpected audio input type: {type(audio_input)}"
@@ -145,8 +145,8 @@ with gr.Blocks(title="CountEM - Music Transcription") as demo:
     **Paper:** [Count the Notes: Histogram-Based Supervision for Automatic Music Transcription](https://arxiv.org/abs/2511.14250) (ISMIR 2025)
     **Models on Hugging Face:**
-    - [countem-musicnet](https://huggingface.co/Yoni-Yaffe/countem-musicnet) - Trained on MusicNet dataset
-    - [countem-synth](https://huggingface.co/Yoni-Yaffe/countem-synth) - Trained on synthetic data
     """
     )
@@ -239,7 +239,7 @@ with gr.Blocks(title="CountEM - Music Transcription") as demo:
 if __name__ == "__main__":
     # Pre-load the default model to speed up first transcription
     print("Pre-loading default model...")
-    load_model("Yoni-Yaffe/countem-musicnet")
     print("Model pre-loaded. Starting Gradio interface...")
     # Launch the demo

         # Map model choice to HuggingFace repo ID
         model_map = {
+            "MusicNet (Recommended)": "Yoni232/countem-musicnet",
+            "Synth": "Yoni232/countem-synth",
         }
         model_name = model_map[model_choice]
                 audio = audio.astype(np.float32) / 2147483648.0
         elif isinstance(audio_input, str):
             # Audio file path provided
+            audio, sr = librosa.load(audio_input, sr=None, mono=False)
         else:
             return None, f"Error: Unexpected audio input type: {type(audio_input)}"
     **Paper:** [Count the Notes: Histogram-Based Supervision for Automatic Music Transcription](https://arxiv.org/abs/2511.14250) (ISMIR 2025)
     **Models on Hugging Face:**
+    - [countem-musicnet](https://huggingface.co/Yoni232/countem-musicnet) - Trained on MusicNet dataset
+    - [countem-synth](https://huggingface.co/Yoni232/countem-synth) - Trained on synthetic data
     """
     )
 if __name__ == "__main__":
     # Pre-load the default model to speed up first transcription
     print("Pre-loading default model...")
+    load_model("Yoni232/countem-musicnet")
     print("Model pre-loaded. Starting Gradio interface...")
     # Launch the demo

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg