Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import tempfile | |
| import os | |
| import subprocess | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import re | |
| from typing import Dict, Tuple | |
| import time | |
| # Configure Streamlit page | |
| st.set_page_config( | |
| page_title="English Accent Detector | REM Waste", | |
| page_icon="π€", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS for better styling | |
| st.markdown(""" | |
| <style> | |
| .main > div { | |
| padding-top: 2rem; | |
| } | |
| .stButton > button { | |
| width: 100%; | |
| border-radius: 10px; | |
| border: none; | |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| font-weight: bold; | |
| padding: 0.75rem; | |
| } | |
| .metric-container { | |
| background: #f0f2f6; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| text-align: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| class AccentDetector: | |
| """Streamlined accent detection for English speech analysis""" | |
| def __init__(self): | |
| self.accent_patterns = { | |
| 'American': { | |
| 'keywords': ['gonna', 'wanna', 'gotta', 'kinda', 'sorta', 'yeah', 'awesome', 'dude'], | |
| 'vocabulary': ['elevator', 'apartment', 'garbage', 'vacation', 'cookie', 'candy', 'mom', 'color'], | |
| 'phrases': ['you know', 'like totally', 'for sure', 'right now'] | |
| }, | |
| 'British': { | |
| 'keywords': ['brilliant', 'lovely', 'quite', 'rather', 'chap', 'bloody', 'bloke', 'cheers'], | |
| 'vocabulary': ['lift', 'flat', 'rubbish', 'holiday', 'biscuit', 'queue', 'mum', 'colour'], | |
| 'phrases': ['i say', 'good heavens', 'how do you do', 'spot on'] | |
| }, | |
| 'Australian': { | |
| 'keywords': ['mate', 'bloody', 'crikey', 'reckon', 'fair dinkum', 'bonkers', 'ripper'], | |
| 'vocabulary': ['arvo', 'brekkie', 'servo', 'bottle-o', 'mozzie', 'barbie', 'ute'], | |
| 'phrases': ['no worries', 'good on ya', 'she\'ll be right', 'too right'] | |
| }, | |
| 'Canadian': { | |
| 'keywords': ['eh', 'about', 'house', 'out', 'sorry', 'hoser', 'beauty'], | |
| 'vocabulary': ['toque', 'hydro', 'washroom', 'parkade', 'chesterfield', 'serviette'], | |
| 'phrases': ['you bet', 'take off', 'give\'r', 'double double'] | |
| }, | |
| 'South African': { | |
| 'keywords': ['ag', 'man', 'hey', 'lekker', 'eish', 'shame', 'howzit'], | |
| 'vocabulary': ['robot', 'bakkie', 'boerewors', 'biltong', 'braai', 'veld'], | |
| 'phrases': ['just now', 'now now', 'is it', 'sharp sharp'] | |
| } | |
| } | |
| def download_video(_self, url: str) -> str: | |
| """Download video with caching, including Loom/YouTube support and debug output""" | |
| try: | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
| } | |
| # YouTube support (including Shorts) | |
| if 'youtube.com' in url or 'youtu.be' in url: | |
| try: | |
| import yt_dlp | |
| except ImportError: | |
| raise Exception("yt-dlp is required for YouTube downloads. Please install with 'pip install yt-dlp'.") | |
| # Use yt-dlp to download best audio to a temp directory, let yt-dlp pick the filename | |
| tmpdir = tempfile.mkdtemp() | |
| ydl_opts = { | |
| 'format': 'bestaudio[ext=m4a]/bestaudio/best', | |
| 'outtmpl': f'{tmpdir}/%(id)s.%(ext)s', | |
| 'quiet': True, | |
| 'noplaylist': True, | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'wav', | |
| 'preferredquality': '192', | |
| }], | |
| 'ffmpeg_location': '/opt/homebrew/bin/ffmpeg', | |
| 'overwrites': True, | |
| } | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(url, download=True) | |
| # Find the resulting .wav file | |
| for f in os.listdir(tmpdir): | |
| if f.endswith('.wav'): | |
| # Move the file to a permanent temp location so it persists after this function | |
| final_temp = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') | |
| final_temp.close() | |
| with open(os.path.join(tmpdir, f), 'rb') as src, open(final_temp.name, 'wb') as dst: | |
| dst.write(src.read()) | |
| return final_temp.name | |
| raise Exception("yt-dlp did not produce a valid audio file. Try another video or update yt-dlp/ffmpeg.") | |
| except Exception as e: | |
| raise Exception(f"yt-dlp failed: {str(e)}. Try updating yt-dlp and ffmpeg.") | |
| # Loom support (fallback: try to extract video from page HTML) | |
| if 'loom.com' in url: | |
| resp = requests.get(url, headers=headers, timeout=30) | |
| if resp.status_code != 200: | |
| raise Exception("Failed to fetch Loom page") | |
| html = resp.text | |
| import re | |
| match = re.search(r'src="([^"]+\.mp4)"', html) | |
| if not match: | |
| match = re.search(r'https://cdn\.loom\.com/sessions/[^"\s]+\.mp4', html) | |
| if not match: | |
| raise Exception("Could not extract Loom video stream URL from page HTML") | |
| video_url = match.group(1) | |
| url = video_url | |
| # Download video (Loom or direct) | |
| response = requests.get(url, headers=headers, stream=True, timeout=60) | |
| response.raise_for_status() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| temp_file.write(chunk) | |
| return temp_file.name | |
| except Exception as e: | |
| raise Exception(f"Download failed: {str(e)}") | |
| def extract_audio_simple(self, video_path: str) -> str: | |
| """Robust audio extraction: handles mp3, wav, mp4, etc.""" | |
| try: | |
| import os | |
| from pydub import AudioSegment | |
| ext = os.path.splitext(video_path)[1].lower() | |
| audio_path = video_path.rsplit('.', 1)[0] + '.wav' | |
| # If already wav, use pydub directly | |
| if ext == '.wav': | |
| audio = AudioSegment.from_wav(video_path) | |
| else: | |
| audio = AudioSegment.from_file(video_path) | |
| audio = audio.set_frame_rate(16000).set_channels(1) | |
| if len(audio) > 120000: | |
| audio = audio[:120000] | |
| audio.export(audio_path, format="wav") | |
| return audio_path | |
| except Exception as e: | |
| raise Exception(f"Audio extraction failed: {str(e)}") | |
| def transcribe_audio(self, audio_path: str) -> str: | |
| """Transcribe with error handling""" | |
| try: | |
| r = sr.Recognizer() | |
| r.energy_threshold = 300 | |
| r.dynamic_energy_threshold = True | |
| with sr.AudioFile(audio_path) as source: | |
| r.adjust_for_ambient_noise(source, duration=0.5) | |
| audio_data = r.record(source) | |
| # Try Google Speech Recognition | |
| text = r.recognize_google(audio_data, language='en-US') | |
| return text.lower() | |
| except sr.UnknownValueError: | |
| raise Exception("Could not understand the audio clearly") | |
| except sr.RequestError as e: | |
| raise Exception(f"Speech recognition service error: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Transcription failed: {str(e)}") | |
| def analyze_patterns(self, text: str) -> Dict[str, float]: | |
| """Enhanced pattern analysis""" | |
| scores = {} | |
| words = text.split() | |
| word_count = max(len(words), 1) | |
| for accent, patterns in self.accent_patterns.items(): | |
| score = 0.0 | |
| total_matches = 0 | |
| # Keywords (high weight) | |
| for keyword in patterns['keywords']: | |
| if keyword in text: | |
| score += 20.0 | |
| total_matches += 1 | |
| # Vocabulary (medium weight) | |
| for vocab in patterns['vocabulary']: | |
| if vocab in text: | |
| score += 15.0 | |
| total_matches += 1 | |
| # Phrases (high weight) | |
| for phrase in patterns['phrases']: | |
| if phrase in text: | |
| score += 25.0 | |
| total_matches += 1 | |
| # Normalize and add base confidence | |
| if total_matches > 0: | |
| score = min(score * (total_matches / word_count) * 50, 95.0) | |
| else: | |
| score = self._get_base_score(text, accent) | |
| scores[accent] = round(max(score, 5.0), 1) | |
| return scores | |
| def _get_base_score(self, text: str, accent: str) -> float: | |
| """Base scoring for general patterns""" | |
| base_scores = { | |
| 'American': 30.0, | |
| 'British': 20.0, | |
| 'Australian': 15.0, | |
| 'Canadian': 18.0, | |
| 'South African': 12.0 | |
| } | |
| score = base_scores.get(accent, 15.0) | |
| # Spelling adjustments | |
| if accent == 'British': | |
| if any(word in text for word in ['colour', 'favour', 'centre', 'theatre']): | |
| score += 25.0 | |
| elif accent == 'American': | |
| if any(word in text for word in ['color', 'favor', 'center', 'theater']): | |
| score += 25.0 | |
| return min(score, 45.0) | |
| def classify_accent(self, scores: Dict[str, float]) -> Tuple[str, float, str]: | |
| """Classify and explain results""" | |
| if not scores: | |
| return "Unknown", 0.0, "No speech detected" | |
| # Get top result | |
| top_accent = max(scores.items(), key=lambda x: x[1]) | |
| accent, confidence = top_accent | |
| # Generate explanation | |
| if confidence < 25: | |
| explanation = "Low confidence - speech patterns are not strongly distinctive" | |
| elif confidence < 50: | |
| explanation = f"Moderate confidence in {accent} accent based on some linguistic markers" | |
| elif confidence < 75: | |
| explanation = f"Good confidence in {accent} accent with clear characteristic patterns" | |
| else: | |
| explanation = f"High confidence in {accent} accent with strong linguistic evidence" | |
| return accent, confidence, explanation | |
| # Initialize detector | |
| def get_detector(): | |
| return AccentDetector() | |
| def main(): | |
| # Header | |
| st.title("π€ English Accent Detection Tool") | |
| st.markdown("**AI-powered accent analysis for English speech | Built for REM Waste**") | |
| # Description | |
| with st.expander("βΉοΈ How it works", expanded=False): | |
| st.markdown(""" | |
| 1. **Input**: Paste a public video URL (MP4, Loom, YouTube, etc.) | |
| 2. **Processing**: Extract audio β Transcribe speech β Analyze patterns | |
| 3. **Output**: Accent classification + confidence score + explanation | |
| **Supported Accents**: American, British, Australian, Canadian, South African | |
| """) | |
| # Input section | |
| st.subheader("πΉ Video Input") | |
| # File upload option | |
| uploaded_file = st.file_uploader( | |
| "Or upload a local video/audio file (MP4, WAV, MP3, etc.):", | |
| type=["mp4", "mov", "avi", "wav", "mp3", "m4a", "aac", "ogg"], | |
| help="Upload a file directly if you can't use a public URL." | |
| ) | |
| # Sample URLs for testing | |
| with st.expander("π§ͺ Test with sample videos"): | |
| st.markdown(""" | |
| **Sample URLs for testing:** | |
| - `https://sample-videos.com/zip/10/mp4/SampleVideo_1280x720_1mb.mp4` | |
| - `https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-mp4-file.mp4` | |
| - Or any public Loom/YouTube video URL | |
| """) | |
| video_url = st.text_input( | |
| "Enter video URL:", | |
| placeholder="https://example.com/video.mp4", | |
| help="Must be a publicly accessible video URL" | |
| ) | |
| # Process button | |
| if st.button("π Analyze Accent", type="primary"): | |
| if not video_url.strip() and not uploaded_file: | |
| st.error("β οΈ Please enter a video URL or upload a file") | |
| return | |
| if video_url and not video_url.startswith(('http://', 'https://')): | |
| st.error("β οΈ Please enter a valid URL starting with http:// or https://") | |
| return | |
| detector = get_detector() | |
| temp_files = [] | |
| try: | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| if uploaded_file: | |
| # Save uploaded file to a temp file | |
| suffix = os.path.splitext(uploaded_file.name)[1] | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f: | |
| f.write(uploaded_file.read()) | |
| video_path = f.name | |
| temp_files.append(video_path) | |
| else: | |
| status_text.text("π₯ Downloading video...") | |
| progress_bar.progress(20) | |
| video_path = detector.download_video(video_url) | |
| temp_files.append(video_path) | |
| # Step 2: Extract audio | |
| status_text.text("π΅ Extracting audio...") | |
| progress_bar.progress(50) | |
| audio_path = detector.extract_audio_simple(video_path) | |
| temp_files.append(audio_path) | |
| # Step 3: Transcribe | |
| status_text.text("π€ Transcribing speech...") | |
| progress_bar.progress(75) | |
| transcript = detector.transcribe_audio(audio_path) | |
| # Step 4: Analyze | |
| status_text.text("π Analyzing accent patterns...") | |
| progress_bar.progress(90) | |
| scores = detector.analyze_patterns(transcript) | |
| accent, confidence, explanation = detector.classify_accent(scores) | |
| # Complete | |
| progress_bar.progress(100) | |
| status_text.text("β Analysis complete!") | |
| time.sleep(0.5) | |
| # Clear progress indicators | |
| progress_bar.empty() | |
| status_text.empty() | |
| # Display results | |
| st.success("π **Analysis Complete!**") | |
| # Main metrics | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.markdown(f""" | |
| <div class="metric-container"> | |
| <h3>π£οΈ Detected Accent</h3> | |
| <h2 style="color: #667eea;">{accent}</h2> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| st.markdown(f""" | |
| <div class="metric-container"> | |
| <h3>π― Confidence</h3> | |
| <h2 style="color: #764ba2;">{confidence}%</h2> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col3: | |
| # Get transcript length for quality indicator | |
| word_count = len(transcript.split()) | |
| quality = "High" if word_count > 50 else "Medium" if word_count > 20 else "Low" | |
| st.markdown(f""" | |
| <div class="metric-container"> | |
| <h3>π Data Quality</h3> | |
| <h2 style="color: #28a745;">{quality}</h2> | |
| <small>{word_count} words</small> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("---") | |
| # Explanation | |
| st.subheader("π Analysis Summary") | |
| st.info(explanation) | |
| # Transcript | |
| st.subheader("π Transcribed Speech") | |
| st.text_area( | |
| "Full transcript:", | |
| transcript, | |
| height=120, | |
| help="This is what the AI heard from the video" | |
| ) | |
| # Detailed scores | |
| st.subheader("π All Accent Scores") | |
| # Create a more visual representation | |
| for accent_name, score in sorted(scores.items(), key=lambda x: x[1], reverse=True): | |
| # Create progress bar for each accent | |
| col_name, col_bar, col_score = st.columns([2, 6, 1]) | |
| with col_name: | |
| st.write(f"**{accent_name}**") | |
| with col_bar: | |
| st.progress(score / 100) | |
| with col_score: | |
| st.write(f"{score}%") | |
| # Additional insights | |
| if confidence > 60: | |
| st.success(f"π― **Strong Detection**: The {accent} accent markers are clearly present in the speech.") | |
| elif confidence > 40: | |
| st.warning(f"β οΈ **Moderate Detection**: Some {accent} patterns detected, but results may vary with longer audio.") | |
| else: | |
| st.info("π‘ **Tip**: Longer speech samples (30+ seconds) generally provide more accurate results.") | |
| except Exception as e: | |
| st.error(f"β **Processing Error**: {str(e)}") | |
| st.info(""" | |
| **Troubleshooting Tips:** | |
| - Ensure the video URL is publicly accessible | |
| - Try a different video format or shorter video | |
| - Make sure the video contains clear English speech | |
| - Check your internet connection | |
| """) | |
| finally: | |
| # Cleanup temp files | |
| for temp_file in temp_files: | |
| try: | |
| if os.path.exists(temp_file): | |
| os.remove(temp_file) | |
| except: | |
| pass | |
| # Footer information | |
| st.markdown("---") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| **π§ Technical Details** | |
| - Audio processing: Up to 2 minutes | |
| - Speech recognition: Google API | |
| - Analysis: Pattern matching + linguistics | |
| - Processing time: ~30-90 seconds | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **π Requirements** | |
| - Public video URLs only | |
| - Clear English speech preferred | |
| - Supports MP4, MOV, AVI formats | |
| - Works with Loom, YouTube, direct links | |
| """) | |
| # API information | |
| with st.expander("π API Usage"): | |
| st.code(""" | |
| # Python API usage example | |
| from accent_detector import AccentDetector | |
| detector = AccentDetector() | |
| result = detector.process_video("https://your-video.com/file.mp4") | |
| print(f"Accent: {result['accent']}") | |
| print(f"Confidence: {result['confidence']}%") | |
| """, language="python") | |
| # About section | |
| with st.expander("βΉοΈ About This Tool"): | |
| st.markdown(""" | |
| **Built for REM Waste Interview Challenge** | |
| This accent detection tool analyzes English speech patterns to classify regional accents. | |
| It's designed for hiring automation systems that need to evaluate spoken English proficiency. | |
| **Algorithm Overview:** | |
| - Extracts audio from video files | |
| - Transcribes speech using Google Speech Recognition | |
| - Analyzes linguistic patterns, vocabulary, and pronunciation markers | |
| - Provides confidence scores based on pattern strength | |
| **Accuracy Notes:** | |
| - Best results with 30+ seconds of clear speech | |
| - Confidence scores reflect pattern strength, not absolute accuracy | |
| - Designed for screening purposes, not definitive classification | |
| **Privacy & Ethics:** | |
| - No audio/video data is stored permanently | |
| - Temporary files are automatically deleted | |
| - Tool is intended for voluntary language assessment only | |
| """) | |
| if __name__ == "__main__": | |
| main() |