""" Quickstart Example - Ensemble TTS Annotation This notebook demonstrates how to use the EnsembleAnnotator for Portuguese BR TTS annotation with OPTION A (3-model ensemble). """ # %% Install dependencies (if needed) # !pip install -r ../requirements.txt # %% Imports from ensemble_tts import EnsembleAnnotator import librosa import pandas as pd from pathlib import Path # %% Example 1: Annotate a single audio file print("=" * 60) print("Example 1: Single Audio File Annotation") print("=" * 60) # Create annotator (OPTION A - Balanced mode) annotator = EnsembleAnnotator( mode='balanced', # 3 models: emotion2vec (fine-tuned) + Whisper + SenseVoice device='cpu', # Use 'cuda' if GPU available voting_strategy='weighted', enable_events=True ) # Annotate audio file # result = annotator.annotate('path/to/audio.wav') # Example with dummy audio import numpy as np dummy_audio = np.random.randn(16000) # 1 second of audio result = annotator.annotate(dummy_audio, sample_rate=16000) print("\nResult:") print(f"Emotion: {result['emotion']['label']}") print(f"Confidence: {result['emotion']['confidence']:.2%}") print(f"Agreement: {result['emotion']['agreement']:.2%}") print(f"Events detected: {result['events']['detected']}") # %% Example 2: Annotate multiple files print("\n" + "=" * 60) print("Example 2: Batch Annotation") print("=" * 60) # List of audio files audio_files = [ 'audio1.wav', 'audio2.wav', 'audio3.wav' ] # For this example, use dummy audio dummy_audios = [np.random.randn(16000 * (i + 1)) for i in range(3)] # Annotate batch results = annotator.annotate_batch( dummy_audios, sample_rates=[16000] * 3 ) print(f"\nAnnotated {len(results)} files") for i, result in enumerate(results): print(f" File {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") # %% Example 3: Annotate HuggingFace dataset print("\n" + "=" * 60) print("Example 3: HuggingFace Dataset Annotation") print("=" * 60) from datasets import load_dataset # Load dataset dataset = load_dataset('marcosremar2/orpheus-tts-portuguese-dataset', split='train') # Annotate first 10 samples results = annotator.annotate_dataset( dataset, audio_column='audio', text_column='text', max_samples=10 ) print(f"\nAnnotated {len(results)} samples from dataset") # Get statistics stats = annotator.get_stats(results) print("\nStatistics:") print(f" Emotion distribution: {stats['emotion_distribution']}") print(f" Average confidence: {stats['avg_confidence']:.2%}") print(f" Average agreement: {stats['avg_agreement']:.2%}") print(f" Events detected: {stats['total_events_detected']}") # %% Example 4: Save results to file print("\n" + "=" * 60) print("Example 4: Save Results") print("=" * 60) # Convert to DataFrame df = pd.DataFrame([ { 'sample_id': r.get('sample_id', i), 'text': r.get('text', ''), 'emotion': r['emotion']['label'], 'confidence': r['emotion']['confidence'], 'agreement': r['emotion']['agreement'], 'events': ','.join(r['events']['detected']) } for i, r in enumerate(results) ]) # Save to parquet output_file = '../data/annotated/example_annotations.parquet' df.to_parquet(output_file) print(f"✅ Saved {len(df)} annotations to {output_file}") # %% Example 5: Compare modes (Quick vs Balanced vs Full) print("\n" + "=" * 60) print("Example 5: Compare Modes") print("=" * 60) modes = ['quick', 'balanced', 'full'] test_audio = np.random.randn(16000 * 3) # 3 seconds for mode in modes: print(f"\n{mode.upper()} MODE:") print("-" * 40) annotator_mode = EnsembleAnnotator(mode=mode, device='cpu') import time start = time.time() result = annotator_mode.annotate(test_audio, sample_rate=16000) elapsed = time.time() - start print(f" Emotion: {result['emotion']['label']}") print(f" Confidence: {result['emotion']['confidence']:.2%}") print(f" Agreement: {result['emotion']['agreement']:.2%}") print(f" Time: {elapsed:.2f}s") # %% Example 6: Access individual model predictions print("\n" + "=" * 60) print("Example 6: Individual Model Predictions") print("=" * 60) result = annotator.annotate(dummy_audio, sample_rate=16000) print("\nEnsemble decision:") print(f" Final: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") print("\nIndividual predictions:") for pred in result['emotion']['predictions']: print(f" {pred['model_name']}: {pred['label']} ({pred['confidence']:.2%}) [weight: {pred['model_weight']:.2f}]") print("\nVoting breakdown:") for emotion, count in result['emotion']['votes'].items(): print(f" {emotion}: {count} vote(s)") # %% Example 7: Use quick annotation function print("\n" + "=" * 60) print("Example 7: Quick Annotation Function") print("=" * 60) from ensemble_tts import annotate_file # Quick annotation (one-liner) # result = annotate_file('audio.wav', mode='balanced', device='cuda') # With dummy audio (for demo) annotator_quick = EnsembleAnnotator(mode='quick', device='cpu') result = annotator_quick.annotate(dummy_audio, sample_rate=16000) print(f"Quick result: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") print("\n" + "=" * 60) print("✅ All examples completed!") print("=" * 60)