Spaces:
Sleeping
Sleeping
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import os | |
| import pandas as pd | |
| def mp3_to_wav(mp3_path): | |
| sound = AudioSegment.from_mp3(mp3_path) | |
| wav_path = mp3_path.replace(".mp3", ".wav") | |
| sound.export(wav_path, format="wav") | |
| return wav_path | |
| def transcribe(audio_path): | |
| recognizer = sr.Recognizer() | |
| wav_path = audio_path | |
| if not audio_path.lower().endswith(".wav"): | |
| wav_path = mp3_to_wav(audio_path) | |
| with sr.AudioFile(wav_path) as source: | |
| audio = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio) | |
| except Exception as e: | |
| text = "" | |
| if wav_path != audio_path and os.path.exists(wav_path): | |
| os.remove(wav_path) | |
| return text | |
| def word_by_word_table(ref_audio_path, user_audio_path): | |
| ref_text = transcribe(ref_audio_path) | |
| user_text = transcribe(user_audio_path) | |
| ref_words = ref_text.strip().split() | |
| user_words = user_text.strip().split() | |
| max_len = max(len(ref_words), len(user_words)) | |
| rows = [] | |
| for i in range(max_len): | |
| ref_word = ref_words[i] if i < len(ref_words) else "" | |
| user_word = user_words[i] if i < len(user_words) else "" | |
| match = ref_word.lower() == user_word.lower() | |
| rows.append({ | |
| "Reference": ref_word, | |
| "Your Attempt": user_word, | |
| "Match": "β " if match else "β" | |
| }) | |
| df = pd.DataFrame(rows) | |
| return df |