File size: 5,310 Bytes
fe63a26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
Quickstart Example - Ensemble TTS Annotation

This notebook demonstrates how to use the EnsembleAnnotator for Portuguese BR
TTS annotation with OPTION A (3-model ensemble).
"""

# %% Install dependencies (if needed)
# !pip install -r ../requirements.txt

# %% Imports
from ensemble_tts import EnsembleAnnotator
import librosa
import pandas as pd
from pathlib import Path

# %% Example 1: Annotate a single audio file

print("=" * 60)
print("Example 1: Single Audio File Annotation")
print("=" * 60)

# Create annotator (OPTION A - Balanced mode)
annotator = EnsembleAnnotator(
    mode='balanced',        # 3 models: emotion2vec (fine-tuned) + Whisper + SenseVoice
    device='cpu',          # Use 'cuda' if GPU available
    voting_strategy='weighted',
    enable_events=True
)

# Annotate audio file
# result = annotator.annotate('path/to/audio.wav')

# Example with dummy audio
import numpy as np
dummy_audio = np.random.randn(16000)  # 1 second of audio
result = annotator.annotate(dummy_audio, sample_rate=16000)

print("\nResult:")
print(f"Emotion: {result['emotion']['label']}")
print(f"Confidence: {result['emotion']['confidence']:.2%}")
print(f"Agreement: {result['emotion']['agreement']:.2%}")
print(f"Events detected: {result['events']['detected']}")

# %% Example 2: Annotate multiple files

print("\n" + "=" * 60)
print("Example 2: Batch Annotation")
print("=" * 60)

# List of audio files
audio_files = [
    'audio1.wav',
    'audio2.wav',
    'audio3.wav'
]

# For this example, use dummy audio
dummy_audios = [np.random.randn(16000 * (i + 1)) for i in range(3)]

# Annotate batch
results = annotator.annotate_batch(
    dummy_audios,
    sample_rates=[16000] * 3
)

print(f"\nAnnotated {len(results)} files")
for i, result in enumerate(results):
    print(f"  File {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

# %% Example 3: Annotate HuggingFace dataset

print("\n" + "=" * 60)
print("Example 3: HuggingFace Dataset Annotation")
print("=" * 60)

from datasets import load_dataset

# Load dataset
dataset = load_dataset('marcosremar2/orpheus-tts-portuguese-dataset', split='train')

# Annotate first 10 samples
results = annotator.annotate_dataset(
    dataset,
    audio_column='audio',
    text_column='text',
    max_samples=10
)

print(f"\nAnnotated {len(results)} samples from dataset")

# Get statistics
stats = annotator.get_stats(results)
print("\nStatistics:")
print(f"  Emotion distribution: {stats['emotion_distribution']}")
print(f"  Average confidence: {stats['avg_confidence']:.2%}")
print(f"  Average agreement: {stats['avg_agreement']:.2%}")
print(f"  Events detected: {stats['total_events_detected']}")

# %% Example 4: Save results to file

print("\n" + "=" * 60)
print("Example 4: Save Results")
print("=" * 60)

# Convert to DataFrame
df = pd.DataFrame([
    {
        'sample_id': r.get('sample_id', i),
        'text': r.get('text', ''),
        'emotion': r['emotion']['label'],
        'confidence': r['emotion']['confidence'],
        'agreement': r['emotion']['agreement'],
        'events': ','.join(r['events']['detected'])
    }
    for i, r in enumerate(results)
])

# Save to parquet
output_file = '../data/annotated/example_annotations.parquet'
df.to_parquet(output_file)
print(f"✅ Saved {len(df)} annotations to {output_file}")

# %% Example 5: Compare modes (Quick vs Balanced vs Full)

print("\n" + "=" * 60)
print("Example 5: Compare Modes")
print("=" * 60)

modes = ['quick', 'balanced', 'full']
test_audio = np.random.randn(16000 * 3)  # 3 seconds

for mode in modes:
    print(f"\n{mode.upper()} MODE:")
    print("-" * 40)

    annotator_mode = EnsembleAnnotator(mode=mode, device='cpu')

    import time
    start = time.time()
    result = annotator_mode.annotate(test_audio, sample_rate=16000)
    elapsed = time.time() - start

    print(f"  Emotion: {result['emotion']['label']}")
    print(f"  Confidence: {result['emotion']['confidence']:.2%}")
    print(f"  Agreement: {result['emotion']['agreement']:.2%}")
    print(f"  Time: {elapsed:.2f}s")

# %% Example 6: Access individual model predictions

print("\n" + "=" * 60)
print("Example 6: Individual Model Predictions")
print("=" * 60)

result = annotator.annotate(dummy_audio, sample_rate=16000)

print("\nEnsemble decision:")
print(f"  Final: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

print("\nIndividual predictions:")
for pred in result['emotion']['predictions']:
    print(f"  {pred['model_name']}: {pred['label']} ({pred['confidence']:.2%}) [weight: {pred['model_weight']:.2f}]")

print("\nVoting breakdown:")
for emotion, count in result['emotion']['votes'].items():
    print(f"  {emotion}: {count} vote(s)")

# %% Example 7: Use quick annotation function

print("\n" + "=" * 60)
print("Example 7: Quick Annotation Function")
print("=" * 60)

from ensemble_tts import annotate_file

# Quick annotation (one-liner)
# result = annotate_file('audio.wav', mode='balanced', device='cuda')

# With dummy audio (for demo)
annotator_quick = EnsembleAnnotator(mode='quick', device='cpu')
result = annotator_quick.annotate(dummy_audio, sample_rate=16000)

print(f"Quick result: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

print("\n" + "=" * 60)
print("✅ All examples completed!")
print("=" * 60)