ensemble-tts-annotation / notebooks /quickstart_example.py

marcosremar

Add complete infrastructure and updated README for OPTION A

fe63a26 11 days ago

5.31 kB

	"""
	Quickstart Example - Ensemble TTS Annotation

	This notebook demonstrates how to use the EnsembleAnnotator for Portuguese BR
	TTS annotation with OPTION A (3-model ensemble).
	"""

	# %% Install dependencies (if needed)
	# !pip install -r ../requirements.txt

	# %% Imports
	from ensemble_tts import EnsembleAnnotator
	import librosa
	import pandas as pd
	from pathlib import Path

	# %% Example 1: Annotate a single audio file

	print("=" * 60)
	print("Example 1: Single Audio File Annotation")
	print("=" * 60)

	# Create annotator (OPTION A - Balanced mode)
	annotator = EnsembleAnnotator(
	mode='balanced', # 3 models: emotion2vec (fine-tuned) + Whisper + SenseVoice
	device='cpu', # Use 'cuda' if GPU available
	voting_strategy='weighted',
	enable_events=True
	)

	# Annotate audio file
	# result = annotator.annotate('path/to/audio.wav')

	# Example with dummy audio
	import numpy as np
	dummy_audio = np.random.randn(16000) # 1 second of audio
	result = annotator.annotate(dummy_audio, sample_rate=16000)

	print("\nResult:")
	print(f"Emotion: {result['emotion']['label']}")
	print(f"Confidence: {result['emotion']['confidence']:.2%}")
	print(f"Agreement: {result['emotion']['agreement']:.2%}")
	print(f"Events detected: {result['events']['detected']}")

	# %% Example 2: Annotate multiple files

	print("\n" + "=" * 60)
	print("Example 2: Batch Annotation")
	print("=" * 60)

	# List of audio files
	audio_files = [
	'audio1.wav',
	'audio2.wav',
	'audio3.wav'
	]

	# For this example, use dummy audio
	dummy_audios = [np.random.randn(16000 * (i + 1)) for i in range(3)]

	# Annotate batch
	results = annotator.annotate_batch(
	dummy_audios,
	sample_rates=[16000] * 3
	)

	print(f"\nAnnotated {len(results)} files")
	for i, result in enumerate(results):
	print(f" File {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

	# %% Example 3: Annotate HuggingFace dataset

	print("\n" + "=" * 60)
	print("Example 3: HuggingFace Dataset Annotation")
	print("=" * 60)

	from datasets import load_dataset

	# Load dataset
	dataset = load_dataset('marcosremar2/orpheus-tts-portuguese-dataset', split='train')

	# Annotate first 10 samples
	results = annotator.annotate_dataset(
	dataset,
	audio_column='audio',
	text_column='text',
	max_samples=10
	)

	print(f"\nAnnotated {len(results)} samples from dataset")

	# Get statistics
	stats = annotator.get_stats(results)
	print("\nStatistics:")
	print(f" Emotion distribution: {stats['emotion_distribution']}")
	print(f" Average confidence: {stats['avg_confidence']:.2%}")
	print(f" Average agreement: {stats['avg_agreement']:.2%}")
	print(f" Events detected: {stats['total_events_detected']}")

	# %% Example 4: Save results to file

	print("\n" + "=" * 60)
	print("Example 4: Save Results")
	print("=" * 60)

	# Convert to DataFrame
	df = pd.DataFrame([
	{
	'sample_id': r.get('sample_id', i),
	'text': r.get('text', ''),
	'emotion': r['emotion']['label'],
	'confidence': r['emotion']['confidence'],
	'agreement': r['emotion']['agreement'],
	'events': ','.join(r['events']['detected'])
	}
	for i, r in enumerate(results)
	])

	# Save to parquet
	output_file = '../data/annotated/example_annotations.parquet'
	df.to_parquet(output_file)
	print(f"✅ Saved {len(df)} annotations to {output_file}")

	# %% Example 5: Compare modes (Quick vs Balanced vs Full)

	print("\n" + "=" * 60)
	print("Example 5: Compare Modes")
	print("=" * 60)

	modes = ['quick', 'balanced', 'full']
	test_audio = np.random.randn(16000 * 3) # 3 seconds

	for mode in modes:
	print(f"\n{mode.upper()} MODE:")
	print("-" * 40)

	annotator_mode = EnsembleAnnotator(mode=mode, device='cpu')

	import time
	start = time.time()
	result = annotator_mode.annotate(test_audio, sample_rate=16000)
	elapsed = time.time() - start

	print(f" Emotion: {result['emotion']['label']}")
	print(f" Confidence: {result['emotion']['confidence']:.2%}")
	print(f" Agreement: {result['emotion']['agreement']:.2%}")
	print(f" Time: {elapsed:.2f}s")

	# %% Example 6: Access individual model predictions

	print("\n" + "=" * 60)
	print("Example 6: Individual Model Predictions")
	print("=" * 60)

	result = annotator.annotate(dummy_audio, sample_rate=16000)

	print("\nEnsemble decision:")
	print(f" Final: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

	print("\nIndividual predictions:")
	for pred in result['emotion']['predictions']:
	print(f" {pred['model_name']}: {pred['label']} ({pred['confidence']:.2%}) [weight: {pred['model_weight']:.2f}]")

	print("\nVoting breakdown:")
	for emotion, count in result['emotion']['votes'].items():
	print(f" {emotion}: {count} vote(s)")

	# %% Example 7: Use quick annotation function

	print("\n" + "=" * 60)
	print("Example 7: Quick Annotation Function")
	print("=" * 60)

	from ensemble_tts import annotate_file

	# Quick annotation (one-liner)
	# result = annotate_file('audio.wav', mode='balanced', device='cuda')

	# With dummy audio (for demo)
	annotator_quick = EnsembleAnnotator(mode='quick', device='cpu')
	result = annotator_quick.annotate(dummy_audio, sample_rate=16000)

	print(f"Quick result: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

	print("\n" + "=" * 60)
	print("✅ All examples completed!")
	print("=" * 60)