Spaces:

Shago
/

pronunciation_assessment

Sleeping

pronunciation_assessment / utils.py

Upload 5 files

81e781d verified 5 months ago

1.44 kB

	import speech_recognition as sr
	from pydub import AudioSegment
	import os
	import pandas as pd

	def mp3_to_wav(mp3_path):
	sound = AudioSegment.from_mp3(mp3_path)
	wav_path = mp3_path.replace(".mp3", ".wav")
	sound.export(wav_path, format="wav")
	return wav_path

	def transcribe(audio_path):
	recognizer = sr.Recognizer()
	wav_path = audio_path
	if not audio_path.lower().endswith(".wav"):
	wav_path = mp3_to_wav(audio_path)
	with sr.AudioFile(wav_path) as source:
	audio = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio)
	except Exception as e:
	text = ""
	if wav_path != audio_path and os.path.exists(wav_path):
	os.remove(wav_path)
	return text

	def word_by_word_table(ref_audio_path, user_audio_path):
	ref_text = transcribe(ref_audio_path)
	user_text = transcribe(user_audio_path)
	ref_words = ref_text.strip().split()
	user_words = user_text.strip().split()
	max_len = max(len(ref_words), len(user_words))
	rows = []
	for i in range(max_len):
	ref_word = ref_words[i] if i < len(ref_words) else ""
	user_word = user_words[i] if i < len(user_words) else ""
	match = ref_word.lower() == user_word.lower()
	rows.append({
	"Reference": ref_word,
	"Your Attempt": user_word,
	"Match": "✅" if match else "❌"
	})
	df = pd.DataFrame(rows)
	return df