|
|
""" |
|
|
Quickstart Example - Ensemble TTS Annotation |
|
|
|
|
|
This notebook demonstrates how to use the EnsembleAnnotator for Portuguese BR |
|
|
TTS annotation with OPTION A (3-model ensemble). |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from ensemble_tts import EnsembleAnnotator |
|
|
import librosa |
|
|
import pandas as pd |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
print("=" * 60) |
|
|
print("Example 1: Single Audio File Annotation") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
annotator = EnsembleAnnotator( |
|
|
mode='balanced', |
|
|
device='cpu', |
|
|
voting_strategy='weighted', |
|
|
enable_events=True |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
dummy_audio = np.random.randn(16000) |
|
|
result = annotator.annotate(dummy_audio, sample_rate=16000) |
|
|
|
|
|
print("\nResult:") |
|
|
print(f"Emotion: {result['emotion']['label']}") |
|
|
print(f"Confidence: {result['emotion']['confidence']:.2%}") |
|
|
print(f"Agreement: {result['emotion']['agreement']:.2%}") |
|
|
print(f"Events detected: {result['events']['detected']}") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 2: Batch Annotation") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
audio_files = [ |
|
|
'audio1.wav', |
|
|
'audio2.wav', |
|
|
'audio3.wav' |
|
|
] |
|
|
|
|
|
|
|
|
dummy_audios = [np.random.randn(16000 * (i + 1)) for i in range(3)] |
|
|
|
|
|
|
|
|
results = annotator.annotate_batch( |
|
|
dummy_audios, |
|
|
sample_rates=[16000] * 3 |
|
|
) |
|
|
|
|
|
print(f"\nAnnotated {len(results)} files") |
|
|
for i, result in enumerate(results): |
|
|
print(f" File {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 3: HuggingFace Dataset Annotation") |
|
|
print("=" * 60) |
|
|
|
|
|
from datasets import load_dataset |
|
|
|
|
|
|
|
|
dataset = load_dataset('marcosremar2/orpheus-tts-portuguese-dataset', split='train') |
|
|
|
|
|
|
|
|
results = annotator.annotate_dataset( |
|
|
dataset, |
|
|
audio_column='audio', |
|
|
text_column='text', |
|
|
max_samples=10 |
|
|
) |
|
|
|
|
|
print(f"\nAnnotated {len(results)} samples from dataset") |
|
|
|
|
|
|
|
|
stats = annotator.get_stats(results) |
|
|
print("\nStatistics:") |
|
|
print(f" Emotion distribution: {stats['emotion_distribution']}") |
|
|
print(f" Average confidence: {stats['avg_confidence']:.2%}") |
|
|
print(f" Average agreement: {stats['avg_agreement']:.2%}") |
|
|
print(f" Events detected: {stats['total_events_detected']}") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 4: Save Results") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
df = pd.DataFrame([ |
|
|
{ |
|
|
'sample_id': r.get('sample_id', i), |
|
|
'text': r.get('text', ''), |
|
|
'emotion': r['emotion']['label'], |
|
|
'confidence': r['emotion']['confidence'], |
|
|
'agreement': r['emotion']['agreement'], |
|
|
'events': ','.join(r['events']['detected']) |
|
|
} |
|
|
for i, r in enumerate(results) |
|
|
]) |
|
|
|
|
|
|
|
|
output_file = '../data/annotated/example_annotations.parquet' |
|
|
df.to_parquet(output_file) |
|
|
print(f"✅ Saved {len(df)} annotations to {output_file}") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 5: Compare Modes") |
|
|
print("=" * 60) |
|
|
|
|
|
modes = ['quick', 'balanced', 'full'] |
|
|
test_audio = np.random.randn(16000 * 3) |
|
|
|
|
|
for mode in modes: |
|
|
print(f"\n{mode.upper()} MODE:") |
|
|
print("-" * 40) |
|
|
|
|
|
annotator_mode = EnsembleAnnotator(mode=mode, device='cpu') |
|
|
|
|
|
import time |
|
|
start = time.time() |
|
|
result = annotator_mode.annotate(test_audio, sample_rate=16000) |
|
|
elapsed = time.time() - start |
|
|
|
|
|
print(f" Emotion: {result['emotion']['label']}") |
|
|
print(f" Confidence: {result['emotion']['confidence']:.2%}") |
|
|
print(f" Agreement: {result['emotion']['agreement']:.2%}") |
|
|
print(f" Time: {elapsed:.2f}s") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 6: Individual Model Predictions") |
|
|
print("=" * 60) |
|
|
|
|
|
result = annotator.annotate(dummy_audio, sample_rate=16000) |
|
|
|
|
|
print("\nEnsemble decision:") |
|
|
print(f" Final: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") |
|
|
|
|
|
print("\nIndividual predictions:") |
|
|
for pred in result['emotion']['predictions']: |
|
|
print(f" {pred['model_name']}: {pred['label']} ({pred['confidence']:.2%}) [weight: {pred['model_weight']:.2f}]") |
|
|
|
|
|
print("\nVoting breakdown:") |
|
|
for emotion, count in result['emotion']['votes'].items(): |
|
|
print(f" {emotion}: {count} vote(s)") |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Example 7: Quick Annotation Function") |
|
|
print("=" * 60) |
|
|
|
|
|
from ensemble_tts import annotate_file |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
annotator_quick = EnsembleAnnotator(mode='quick', device='cpu') |
|
|
result = annotator_quick.annotate(dummy_audio, sample_rate=16000) |
|
|
|
|
|
print(f"Quick result: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})") |
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("✅ All examples completed!") |
|
|
print("=" * 60) |
|
|
|