Spaces:
Runtime error
Runtime error
| import os | |
| import spaces | |
| import torch | |
| import torchaudio | |
| import gradio as gr | |
| import logging | |
| from whosper import WhosperTranscriber | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| logger.info("Using CUDA for inference.") | |
| elif torch.backends.mps.is_available(): | |
| device = "mps" | |
| logger.info("Using MPS for inference.") | |
| else: | |
| device = "cpu" | |
| logger.info("Using CPU for inference.") | |
| model_id = "sudoping01/bambara-asr-v2" | |
| transcriber = WhosperTranscriber(model_id=model_id) | |
| logger.info(f"Transcriber initialized with model: {model_id}") | |
| def resample_audio(audio_path, target_sample_rate=16000): | |
| """ | |
| Converts the audio file to the target sampling rate (16000 Hz). | |
| Args: | |
| audio_path (str): Path to the audio file. | |
| target_sample_rate (int): The desired sample rate. | |
| Returns: | |
| A tensor containing the resampled audio data and the target sample rate. | |
| """ | |
| try: | |
| waveform, original_sample_rate = torchaudio.load(audio_path) | |
| if original_sample_rate != target_sample_rate: | |
| resampler = torchaudio.transforms.Resample( | |
| orig_freq=original_sample_rate, | |
| new_freq=target_sample_rate | |
| ) | |
| waveform = resampler(waveform) | |
| return waveform, target_sample_rate | |
| except Exception as e: | |
| logger.error(f"Error resampling audio: {e}") | |
| raise e | |
| def transcribe_audio(audio_file): | |
| """ | |
| Transcribes the provided audio file into Bambara text using Whosper. | |
| Args: | |
| audio_file: The path to the audio file to transcribe. | |
| Returns: | |
| A string representing the transcribed Bambara text. | |
| """ | |
| if audio_file is None: | |
| return "Please provide an audio file for transcription." | |
| try: | |
| logger.info(f"Transcribing audio file: {audio_file}") | |
| result = transcriber.transcribe_audio(audio_file) | |
| logger.info("Transcription successful.") | |
| return result.get("text", "") | |
| except Exception as e: | |
| logger.error(f"Transcription failed: {e}") | |
| return f"Error during transcription: {str(e)}" | |
| def get_example_files(directory="./examples"): | |
| """ | |
| Returns a list of audio files from the examples directory. | |
| Args: | |
| directory (str): The directory to search for audio files. | |
| Returns: | |
| list: A list of paths to the audio files. | |
| """ | |
| if not os.path.exists(directory): | |
| logger.warning(f"Examples directory {directory} not found.") | |
| return [] | |
| audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg'] | |
| audio_files = [] | |
| try: | |
| files = os.listdir(directory) | |
| for file in files: | |
| if any(file.lower().endswith(ext) for ext in audio_extensions): | |
| full_path = os.path.abspath(os.path.join(directory, file)) | |
| audio_files.append(full_path) | |
| logger.info(f"Found {len(audio_files)} example audio files.") | |
| return audio_files[:5] | |
| except Exception as e: | |
| logger.error(f"Error reading examples directory: {e}") | |
| return [] | |
| def build_interface(): | |
| """ | |
| Builds the Gradio interface for Bambara speech recognition. | |
| """ | |
| example_files = get_example_files() | |
| with gr.Blocks(title="Bambara Speech Recognition") as demo: | |
| gr.Markdown( | |
| """ | |
| # π€ Bambara Automatic Speech Recognition | |
| **Powered by MALIBA-AI** | |
| Convert Bambara speech to text using our state-of-the-art ASR model. You can either: | |
| - ποΈ **Record** your voice directly | |
| - π **Upload** an audio file | |
| - π΅ **Try** our example audio files | |
| ## Supported Audio Formats | |
| WAV, MP3, M4A, FLAC, OGG | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio( | |
| label="π€ Record or Upload Audio", | |
| type="filepath", | |
| sources=["microphone", "upload"] | |
| ) | |
| transcribe_btn = gr.Button( | |
| "π Transcribe Audio", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| with gr.Column(): | |
| output_text = gr.Textbox( | |
| label="π Transcribed Text (Bambara)", | |
| lines=8, | |
| placeholder="Your transcribed Bambara text will appear here...", | |
| interactive=False | |
| ) | |
| if example_files: | |
| gr.Markdown("## π΅ Try These Examples") | |
| gr.Examples( | |
| examples=[[f] for f in example_files], | |
| inputs=[audio_input], | |
| outputs=output_text, | |
| fn=transcribe_audio, | |
| cache_examples=False, | |
| label="Example Audio Files" | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ## βΉοΈ About This Model | |
| - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | |
| - **Developer:** MALIBA-AI | |
| - **Language:** Bambara (bm) | |
| - **Task:** Automatic Speech Recognition (ASR) | |
| - **Sample Rate:** 16kHz (automatically resampled) | |
| ## π How to Use | |
| 1. **Record Audio:** Click the microphone button and speak in Bambara | |
| 2. **Upload File:** Click the upload button to select an audio file | |
| 3. **Transcribe:** Click the "Transcribe Audio" button | |
| 4. **View Results:** See your transcribed text in Bambara | |
| ## π Performance Notes | |
| - Best results with clear speech and minimal background noise | |
| - Supports various audio formats and durations | |
| - Optimized for Bambara language patterns and phonetics | |
| """ | |
| ) | |
| transcribe_btn.click( | |
| fn=transcribe_audio, | |
| inputs=[audio_input], | |
| outputs=output_text, | |
| show_progress=True | |
| ) | |
| clear_btn.click( | |
| fn=lambda: (None, ""), | |
| outputs=[audio_input, output_text] | |
| ) | |
| audio_input.change( | |
| fn=transcribe_audio, | |
| inputs=[audio_input], | |
| outputs=output_text, | |
| show_progress=True | |
| ) | |
| return demo | |
| def main(): | |
| """ | |
| Main function to launch the Gradio interface. | |
| """ | |
| logger.info("Starting Bambara ASR Gradio interface.") | |
| interface = build_interface() | |
| interface.launch( | |
| share=False, | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) | |
| logger.info("Gradio interface launched successfully.") | |
| if __name__ == "__main__": | |
| main() | |