Spaces:
Running
on
Zero
Running
on
Zero
added ffmpeg dependency and fixed hf repo url
Browse files- app.py +6 -6
- packages.txt +1 -0
app.py
CHANGED
|
@@ -54,8 +54,8 @@ def transcribe_audio(
|
|
| 54 |
|
| 55 |
# Map model choice to HuggingFace repo ID
|
| 56 |
model_map = {
|
| 57 |
-
"MusicNet (Recommended)": "
|
| 58 |
-
"Synth": "
|
| 59 |
}
|
| 60 |
model_name = model_map[model_choice]
|
| 61 |
|
|
@@ -70,7 +70,7 @@ def transcribe_audio(
|
|
| 70 |
audio = audio.astype(np.float32) / 2147483648.0
|
| 71 |
elif isinstance(audio_input, str):
|
| 72 |
# Audio file path provided
|
| 73 |
-
audio, sr =
|
| 74 |
else:
|
| 75 |
return None, f"Error: Unexpected audio input type: {type(audio_input)}"
|
| 76 |
|
|
@@ -145,8 +145,8 @@ with gr.Blocks(title="CountEM - Music Transcription") as demo:
|
|
| 145 |
**Paper:** [Count the Notes: Histogram-Based Supervision for Automatic Music Transcription](https://arxiv.org/abs/2511.14250) (ISMIR 2025)
|
| 146 |
|
| 147 |
**Models on Hugging Face:**
|
| 148 |
-
- [countem-musicnet](https://huggingface.co/
|
| 149 |
-
- [countem-synth](https://huggingface.co/
|
| 150 |
"""
|
| 151 |
)
|
| 152 |
|
|
@@ -239,7 +239,7 @@ with gr.Blocks(title="CountEM - Music Transcription") as demo:
|
|
| 239 |
if __name__ == "__main__":
|
| 240 |
# Pre-load the default model to speed up first transcription
|
| 241 |
print("Pre-loading default model...")
|
| 242 |
-
load_model("
|
| 243 |
print("Model pre-loaded. Starting Gradio interface...")
|
| 244 |
|
| 245 |
# Launch the demo
|
|
|
|
| 54 |
|
| 55 |
# Map model choice to HuggingFace repo ID
|
| 56 |
model_map = {
|
| 57 |
+
"MusicNet (Recommended)": "Yoni232/countem-musicnet",
|
| 58 |
+
"Synth": "Yoni232/countem-synth",
|
| 59 |
}
|
| 60 |
model_name = model_map[model_choice]
|
| 61 |
|
|
|
|
| 70 |
audio = audio.astype(np.float32) / 2147483648.0
|
| 71 |
elif isinstance(audio_input, str):
|
| 72 |
# Audio file path provided
|
| 73 |
+
audio, sr = librosa.load(audio_input, sr=None, mono=False)
|
| 74 |
else:
|
| 75 |
return None, f"Error: Unexpected audio input type: {type(audio_input)}"
|
| 76 |
|
|
|
|
| 145 |
**Paper:** [Count the Notes: Histogram-Based Supervision for Automatic Music Transcription](https://arxiv.org/abs/2511.14250) (ISMIR 2025)
|
| 146 |
|
| 147 |
**Models on Hugging Face:**
|
| 148 |
+
- [countem-musicnet](https://huggingface.co/Yoni232/countem-musicnet) - Trained on MusicNet dataset
|
| 149 |
+
- [countem-synth](https://huggingface.co/Yoni232/countem-synth) - Trained on synthetic data
|
| 150 |
"""
|
| 151 |
)
|
| 152 |
|
|
|
|
| 239 |
if __name__ == "__main__":
|
| 240 |
# Pre-load the default model to speed up first transcription
|
| 241 |
print("Pre-loading default model...")
|
| 242 |
+
load_model("Yoni232/countem-musicnet")
|
| 243 |
print("Model pre-loaded. Starting Gradio interface...")
|
| 244 |
|
| 245 |
# Launch the demo
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|