Spaces:

adamBouno
/

bambara_transcripter

Runtime error

App Files Files Community

bambara_transcripter / app.py

adamBouno

Update app.py

9508998 verified 6 months ago

raw

history blame contribute delete

7.2 kB

	import os
	import spaces
	import torch
	import torchaudio
	import gradio as gr
	import logging
	from whosper import WhosperTranscriber


	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	if torch.cuda.is_available():
	device = "cuda"
	logger.info("Using CUDA for inference.")
	elif torch.backends.mps.is_available():
	device = "mps"
	logger.info("Using MPS for inference.")
	else:
	device = "cpu"
	logger.info("Using CPU for inference.")


	model_id = "sudoping01/bambara-asr-v2"
	transcriber = WhosperTranscriber(model_id=model_id)
	logger.info(f"Transcriber initialized with model: {model_id}")

	def resample_audio(audio_path, target_sample_rate=16000):

	"""
	Converts the audio file to the target sampling rate (16000 Hz).

	Args:
	audio_path (str): Path to the audio file.
	target_sample_rate (int): The desired sample rate.
	Returns:
	A tensor containing the resampled audio data and the target sample rate.
	"""
	try:
	waveform, original_sample_rate = torchaudio.load(audio_path)

	if original_sample_rate != target_sample_rate:
	resampler = torchaudio.transforms.Resample(
	orig_freq=original_sample_rate,
	new_freq=target_sample_rate
	)
	waveform = resampler(waveform)

	return waveform, target_sample_rate
	except Exception as e:
	logger.error(f"Error resampling audio: {e}")
	raise e

	@spaces.GPU()
	def transcribe_audio(audio_file):

	"""
	Transcribes the provided audio file into Bambara text using Whosper.

	Args:
	audio_file: The path to the audio file to transcribe.
	Returns:
	A string representing the transcribed Bambara text.
	"""

	if audio_file is None:
	return "Please provide an audio file for transcription."

	try:
	logger.info(f"Transcribing audio file: {audio_file}")


	result = transcriber.transcribe_audio(audio_file)

	logger.info("Transcription successful.")
	return result.get("text", "")

	except Exception as e:
	logger.error(f"Transcription failed: {e}")
	return f"Error during transcription: {str(e)}"

	def get_example_files(directory="./examples"):

	"""
	Returns a list of audio files from the examples directory.

	Args:
	directory (str): The directory to search for audio files.
	Returns:
	list: A list of paths to the audio files.
	"""

	if not os.path.exists(directory):
	logger.warning(f"Examples directory {directory} not found.")
	return []


	audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
	audio_files = []

	try:
	files = os.listdir(directory)
	for file in files:
	if any(file.lower().endswith(ext) for ext in audio_extensions):
	full_path = os.path.abspath(os.path.join(directory, file))
	audio_files.append(full_path)

	logger.info(f"Found {len(audio_files)} example audio files.")
	return audio_files[:5]

	except Exception as e:
	logger.error(f"Error reading examples directory: {e}")
	return []

	def build_interface():
	"""
	Builds the Gradio interface for Bambara speech recognition.
	"""

	example_files = get_example_files()

	with gr.Blocks(title="Bambara Speech Recognition") as demo:
	gr.Markdown(
	"""
	# 🎤 Bambara Automatic Speech Recognition

	Powered by MALIBA-AI

	Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
	- 🎙️ Record your voice directly
	- 📁 Upload an audio file
	- 🎵 Try our example audio files

	## Supported Audio Formats
	WAV, MP3, M4A, FLAC, OGG
	"""
	)

	with gr.Row():
	with gr.Column():

	audio_input = gr.Audio(
	label="🎤 Record or Upload Audio",
	type="filepath",
	sources=["microphone", "upload"]
	)

	transcribe_btn = gr.Button(
	"🔄 Transcribe Audio",
	variant="primary",
	size="lg"
	)


	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	with gr.Column():
	output_text = gr.Textbox(
	label="📝 Transcribed Text (Bambara)",
	lines=8,
	placeholder="Your transcribed Bambara text will appear here...",
	interactive=False
	)


	if example_files:
	gr.Markdown("## 🎵 Try These Examples")
	gr.Examples(
	examples=[[f] for f in example_files],
	inputs=[audio_input],
	outputs=output_text,
	fn=transcribe_audio,
	cache_examples=False,
	label="Example Audio Files"
	)


	gr.Markdown(
	"""
	---

	## ℹ️ About This Model

	- Model: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
	- Developer: MALIBA-AI
	- Language: Bambara (bm)
	- Task: Automatic Speech Recognition (ASR)
	- Sample Rate: 16kHz (automatically resampled)

	## 🚀 How to Use

	1. Record Audio: Click the microphone button and speak in Bambara
	2. Upload File: Click the upload button to select an audio file
	3. Transcribe: Click the "Transcribe Audio" button
	4. View Results: See your transcribed text in Bambara

	## 📊 Performance Notes

	- Best results with clear speech and minimal background noise
	- Supports various audio formats and durations
	- Optimized for Bambara language patterns and phonetics
	"""
	)


	transcribe_btn.click(
	fn=transcribe_audio,
	inputs=[audio_input],
	outputs=output_text,
	show_progress=True
	)

	clear_btn.click(
	fn=lambda: (None, ""),
	outputs=[audio_input, output_text]
	)


	audio_input.change(
	fn=transcribe_audio,
	inputs=[audio_input],
	outputs=output_text,
	show_progress=True
	)

	return demo

	def main():
	"""
	Main function to launch the Gradio interface.
	"""
	logger.info("Starting Bambara ASR Gradio interface.")


	interface = build_interface()
	interface.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=7860
	)

	logger.info("Gradio interface launched successfully.")

	if __name__ == "__main__":
	main()