Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import tempfile | |
| import os | |
| from pathlib import Path | |
| import subprocess | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import re | |
| import numpy as np | |
| from typing import Dict, Tuple, Optional | |
| import json | |
| class AccentDetector: | |
| """ | |
| Accent detection system that analyzes English speech patterns | |
| to classify regional accents and provide confidence scores. | |
| """ | |
| def __init__(self): | |
| self.accent_patterns = { | |
| 'American': { | |
| 'keywords': ['gonna', 'wanna', 'gotta', 'kinda', 'sorta'], | |
| 'phonetic_markers': ['r-colored vowels', 'rhotic'], | |
| 'vocabulary': ['elevator', 'apartment', 'garbage', 'vacation', 'cookie'] | |
| }, | |
| 'British': { | |
| 'keywords': ['brilliant', 'lovely', 'quite', 'rather', 'chap'], | |
| 'phonetic_markers': ['non-rhotic', 'received pronunciation'], | |
| 'vocabulary': ['lift', 'flat', 'rubbish', 'holiday', 'biscuit'] | |
| }, | |
| 'Australian': { | |
| 'keywords': ['mate', 'bloody', 'fair dinkum', 'crikey', 'reckon'], | |
| 'phonetic_markers': ['broad vowels', 'rising intonation'], | |
| 'vocabulary': ['arvo', 'brekkie', 'servo', 'bottle-o', 'mozzie'] | |
| }, | |
| 'Canadian': { | |
| 'keywords': ['eh', 'about', 'house', 'out', 'sorry'], | |
| 'phonetic_markers': ['canadian raising', 'eh particle'], | |
| 'vocabulary': ['toque', 'hydro', 'washroom', 'parkade', 'chesterfield'] | |
| }, | |
| 'South African': { | |
| 'keywords': ['ag', 'man', 'hey', 'lekker', 'braai'], | |
| 'phonetic_markers': ['kit-split', 'dental fricatives'], | |
| 'vocabulary': ['robot', 'bakkie', 'boerewors', 'biltong', 'sosatie'] | |
| } | |
| } | |
| def download_video(self, url: str) -> str: | |
| """Download video from URL to temporary file""" | |
| try: | |
| response = requests.get(url, stream=True, timeout=30) | |
| response.raise_for_status() | |
| # Create temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| temp_file.write(chunk) | |
| return temp_file.name | |
| except Exception as e: | |
| raise Exception(f"Failed to download video: {str(e)}") | |
| def extract_audio(self, video_path: str) -> str: | |
| """Extract audio from video file using ffmpeg""" | |
| try: | |
| audio_path = video_path.replace('.mp4', '.wav') | |
| # Use ffmpeg to extract audio | |
| cmd = [ | |
| 'ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', | |
| '-ar', '16000', '-ac', '1', '-y', audio_path | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| # Fallback to pydub if ffmpeg fails | |
| audio = AudioSegment.from_file(video_path) | |
| audio = audio.set_frame_rate(16000).set_channels(1) | |
| audio.export(audio_path, format="wav") | |
| return audio_path | |
| except Exception as e: | |
| raise Exception(f"Failed to extract audio: {str(e)}") | |
| def transcribe_audio(self, audio_path: str) -> str: | |
| """Transcribe audio to text using speech recognition""" | |
| try: | |
| r = sr.Recognizer() | |
| with sr.AudioFile(audio_path) as source: | |
| # Adjust for ambient noise | |
| r.adjust_for_ambient_noise(source, duration=0.5) | |
| audio_data = r.record(source) | |
| # Use Google Speech Recognition (free tier) | |
| text = r.recognize_google(audio_data, language='en-US') | |
| return text.lower() | |
| except sr.UnknownValueError: | |
| raise Exception("Could not understand the audio") | |
| except sr.RequestError as e: | |
| raise Exception(f"Speech recognition error: {str(e)}") | |
| def analyze_accent_patterns(self, text: str) -> Dict[str, float]: | |
| """Analyze text for accent-specific patterns""" | |
| scores = {} | |
| words = text.split() | |
| word_count = len(words) | |
| if word_count == 0: | |
| return {accent: 0.0 for accent in self.accent_patterns.keys()} | |
| for accent, patterns in self.accent_patterns.items(): | |
| score = 0.0 | |
| matches = 0 | |
| # Check for accent-specific keywords | |
| for keyword in patterns['keywords']: | |
| if keyword in text: | |
| score += 15.0 | |
| matches += 1 | |
| # Check for accent-specific vocabulary | |
| for vocab_word in patterns['vocabulary']: | |
| if vocab_word in text: | |
| score += 10.0 | |
| matches += 1 | |
| # Normalize score based on text length and matches | |
| if matches > 0: | |
| score = min(score * (matches / word_count) * 100, 95.0) | |
| else: | |
| # Base score for general English patterns | |
| score = self._calculate_base_score(text, accent) | |
| scores[accent] = round(score, 1) | |
| return scores | |
| def _calculate_base_score(self, text: str, accent: str) -> float: | |
| """Calculate base confidence score for accent detection""" | |
| # Simple heuristics based on common patterns | |
| base_scores = { | |
| 'American': 25.0, # Default higher for American English | |
| 'British': 15.0, | |
| 'Australian': 10.0, | |
| 'Canadian': 12.0, | |
| 'South African': 8.0 | |
| } | |
| # Adjust based on text characteristics | |
| score = base_scores.get(accent, 10.0) | |
| # Look for spelling patterns | |
| if accent == 'British' and ('colour' in text or 'favour' in text or 'centre' in text): | |
| score += 20.0 | |
| elif accent == 'American' and ('color' in text or 'favor' in text or 'center' in text): | |
| score += 20.0 | |
| return min(score, 40.0) # Cap base scores | |
| def classify_accent(self, scores: Dict[str, float]) -> Tuple[str, float, str]: | |
| """Classify the most likely accent and provide explanation""" | |
| if not scores or all(score == 0 for score in scores.values()): | |
| return "Unknown", 0.0, "Insufficient accent markers detected" | |
| # Find the highest scoring accent | |
| top_accent = max(scores.items(), key=lambda x: x[1]) | |
| accent_name, confidence = top_accent | |
| # Generate explanation | |
| explanation = self._generate_explanation(accent_name, confidence, scores) | |
| return accent_name, confidence, explanation | |
| def _generate_explanation(self, accent: str, confidence: float, all_scores: Dict[str, float]) -> str: | |
| """Generate explanation for the accent classification""" | |
| if confidence < 20: | |
| return f"Low confidence detection. The speech patterns are not strongly indicative of any specific English accent." | |
| elif confidence < 50: | |
| return f"Moderate confidence in {accent} accent based on limited linguistic markers." | |
| elif confidence < 75: | |
| return f"Good confidence in {accent} accent. Several characteristic patterns detected." | |
| else: | |
| return f"High confidence in {accent} accent with strong linguistic indicators." | |
| def process_video(self, url: str) -> Dict: | |
| """Main processing pipeline""" | |
| temp_files = [] | |
| try: | |
| # Step 1: Download video | |
| st.write("π₯ Downloading video...") | |
| video_path = self.download_video(url) | |
| temp_files.append(video_path) | |
| # Step 2: Extract audio | |
| st.write("π΅ Extracting audio...") | |
| audio_path = self.extract_audio(video_path) | |
| temp_files.append(audio_path) | |
| # Step 3: Transcribe audio | |
| st.write("π€ Transcribing speech...") | |
| transcript = self.transcribe_audio(audio_path) | |
| # Step 4: Analyze accent | |
| st.write("π Analyzing accent patterns...") | |
| accent_scores = self.analyze_accent_patterns(transcript) | |
| accent, confidence, explanation = self.classify_accent(accent_scores) | |
| return { | |
| 'success': True, | |
| 'transcript': transcript, | |
| 'accent': accent, | |
| 'confidence': confidence, | |
| 'explanation': explanation, | |
| 'all_scores': accent_scores | |
| } | |
| except Exception as e: | |
| return { | |
| 'success': False, | |
| 'error': str(e) | |
| } | |
| finally: | |
| # Cleanup temporary files | |
| for temp_file in temp_files: | |
| try: | |
| if os.path.exists(temp_file): | |
| os.remove(temp_file) | |
| except: | |
| pass | |
| def main(): | |
| st.set_page_config( | |
| page_title="English Accent Detector", | |
| page_icon="π€", | |
| layout="wide" | |
| ) | |
| st.title("π€ English Accent Detection Tool") | |
| st.markdown("### Analyze English accents from video content") | |
| st.markdown(""" | |
| **How it works:** | |
| 1. Paste a public video URL (MP4, Loom, etc.) | |
| 2. The tool extracts audio and transcribes speech | |
| 3. AI analyzes linguistic patterns to detect English accent | |
| 4. Get classification, confidence score, and explanation | |
| """) | |
| # Input section | |
| st.subheader("πΉ Video Input") | |
| video_url = st.text_input( | |
| "Enter video URL:", | |
| placeholder="https://example.com/video.mp4 or Loom link", | |
| help="Must be a direct video link or public Loom video" | |
| ) | |
| # Process button | |
| if st.button("π Analyze Accent", type="primary"): | |
| if not video_url: | |
| st.error("Please enter a video URL") | |
| return | |
| # Validate URL | |
| if not (video_url.startswith('http://') or video_url.startswith('https://')): | |
| st.error("Please enter a valid URL starting with http:// or https://") | |
| return | |
| # Initialize detector | |
| detector = AccentDetector() | |
| # Process video | |
| with st.spinner("Processing video... This may take a few minutes."): | |
| result = detector.process_video(video_url) | |
| # Display results | |
| if result['success']: | |
| st.success("β Analysis Complete!") | |
| # Main results | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric( | |
| label="π£οΈ Detected Accent", | |
| value=result['accent'] | |
| ) | |
| with col2: | |
| st.metric( | |
| label="π― Confidence Score", | |
| value=f"{result['confidence']}%" | |
| ) | |
| # Explanation | |
| st.subheader("π Analysis Explanation") | |
| st.write(result['explanation']) | |
| # Transcript | |
| st.subheader("π Transcript") | |
| st.text_area("Transcribed Text:", result['transcript'], height=100) | |
| # Detailed scores | |
| st.subheader("π Detailed Accent Scores") | |
| scores_df = [] | |
| for accent, score in result['all_scores'].items(): | |
| scores_df.append({"Accent": accent, "Confidence": f"{score}%"}) | |
| st.table(scores_df) | |
| else: | |
| st.error(f"β Error: {result['error']}") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| **Technical Notes:** | |
| - Supports common video formats (MP4, MOV, AVI) | |
| - Works with public Loom videos and direct video links | |
| - Analyzes vocabulary, pronunciation patterns, and linguistic markers | |
| - Optimized for English language detection | |
| """) | |
| if __name__ == "__main__": | |
| main() |