marcosremar
Add comprehensive testing infrastructure
98938e3
"""
Quick test script for OPTION A ensemble.
Tests:
1. Model loading
2. Single audio annotation
3. Batch processing
4. Performance benchmarking
"""
import sys
import logging
import time
import numpy as np
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from ensemble_tts import EnsembleAnnotator
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_model_loading():
"""Test 1: Model Loading"""
logger.info("=" * 60)
logger.info("TEST 1: Model Loading")
logger.info("=" * 60)
try:
annotator = EnsembleAnnotator(
mode='quick', # Start with quick mode for faster testing
device='cpu',
enable_events=False # Disable events for faster testing
)
start = time.time()
annotator.load_models()
elapsed = time.time() - start
logger.info(f"βœ… Models loaded successfully in {elapsed:.2f}s")
return annotator, True
except Exception as e:
logger.error(f"❌ Model loading failed: {e}")
return None, False
def test_single_annotation(annotator):
"""Test 2: Single Audio Annotation"""
logger.info("\n" + "=" * 60)
logger.info("TEST 2: Single Audio Annotation")
logger.info("=" * 60)
try:
# Generate dummy audio (3 seconds)
audio = np.random.randn(16000 * 3).astype(np.float32)
start = time.time()
result = annotator.annotate(audio, sample_rate=16000)
elapsed = time.time() - start
logger.info(f"\nπŸ“Š Annotation Result:")
logger.info(f" Emotion: {result['emotion']['label']}")
logger.info(f" Confidence: {result['emotion']['confidence']:.2%}")
logger.info(f" Agreement: {result['emotion']['agreement']:.2%}")
logger.info(f" Votes: {result['emotion']['votes']}")
logger.info(f" Time: {elapsed:.2f}s")
# Validate result structure
assert 'emotion' in result
assert 'label' in result['emotion']
assert 'confidence' in result['emotion']
assert result['emotion']['confidence'] >= 0 and result['emotion']['confidence'] <= 1
logger.info(f"\nβœ… Single annotation successful")
return True
except Exception as e:
logger.error(f"❌ Single annotation failed: {e}")
import traceback
traceback.print_exc()
return False
def test_batch_processing(annotator):
"""Test 3: Batch Processing"""
logger.info("\n" + "=" * 60)
logger.info("TEST 3: Batch Processing")
logger.info("=" * 60)
try:
# Generate 5 dummy audio samples
batch_size = 5
audios = [np.random.randn(16000 * (i + 1)).astype(np.float32) for i in range(batch_size)]
start = time.time()
results = annotator.annotate_batch(audios, sample_rates=[16000] * batch_size)
elapsed = time.time() - start
logger.info(f"\nπŸ“Š Batch Results:")
for i, result in enumerate(results):
logger.info(f" Sample {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")
logger.info(f"\n Total time: {elapsed:.2f}s")
logger.info(f" Average time per sample: {elapsed/batch_size:.2f}s")
# Validate
assert len(results) == batch_size
logger.info(f"\nβœ… Batch processing successful")
return True
except Exception as e:
logger.error(f"❌ Batch processing failed: {e}")
import traceback
traceback.print_exc()
return False
def test_balanced_mode():
"""Test 4: Balanced Mode (OPTION A)"""
logger.info("\n" + "=" * 60)
logger.info("TEST 4: Balanced Mode (OPTION A)")
logger.info("=" * 60)
try:
annotator_balanced = EnsembleAnnotator(
mode='balanced', # 3 models
device='cpu',
enable_events=False
)
start = time.time()
annotator_balanced.load_models()
load_time = time.time() - start
logger.info(f" Load time: {load_time:.2f}s")
# Test annotation
audio = np.random.randn(16000 * 3).astype(np.float32)
start = time.time()
result = annotator_balanced.annotate(audio, sample_rate=16000)
annotate_time = time.time() - start
logger.info(f"\nπŸ“Š Balanced Mode Result:")
logger.info(f" Emotion: {result['emotion']['label']}")
logger.info(f" Confidence: {result['emotion']['confidence']:.2%}")
logger.info(f" Agreement: {result['emotion']['agreement']:.2%}")
logger.info(f" Number of predictions: {len(result['emotion']['predictions'])}")
logger.info(f" Annotation time: {annotate_time:.2f}s")
# Should have 3 model predictions (OPTION A)
assert len(result['emotion']['predictions']) == 3, \
f"Expected 3 predictions, got {len(result['emotion']['predictions'])}"
logger.info(f"\nβœ… Balanced mode (OPTION A) successful")
return True
except Exception as e:
logger.error(f"❌ Balanced mode failed: {e}")
import traceback
traceback.print_exc()
return False
def benchmark_modes():
"""Test 5: Benchmark All Modes"""
logger.info("\n" + "=" * 60)
logger.info("TEST 5: Performance Benchmark")
logger.info("=" * 60)
modes = ['quick', 'balanced']
audio = np.random.randn(16000 * 3).astype(np.float32)
results = {}
for mode in modes:
logger.info(f"\nπŸ“Š Testing {mode.upper()} mode...")
try:
annotator = EnsembleAnnotator(
mode=mode,
device='cpu',
enable_events=False
)
# Load time
start = time.time()
annotator.load_models()
load_time = time.time() - start
# Annotation time (average of 3 runs)
times = []
for _ in range(3):
start = time.time()
result = annotator.annotate(audio, sample_rate=16000)
times.append(time.time() - start)
avg_time = np.mean(times)
results[mode] = {
'load_time': load_time,
'avg_annotation_time': avg_time,
'num_models': len(result['emotion']['predictions'])
}
logger.info(f" Load time: {load_time:.2f}s")
logger.info(f" Avg annotation time: {avg_time:.2f}s")
logger.info(f" Models: {results[mode]['num_models']}")
except Exception as e:
logger.error(f" ❌ {mode} mode failed: {e}")
results[mode] = {'error': str(e)}
# Summary
logger.info("\n" + "=" * 60)
logger.info("BENCHMARK SUMMARY")
logger.info("=" * 60)
for mode, metrics in results.items():
if 'error' not in metrics:
logger.info(f"\n{mode.upper()} MODE:")
logger.info(f" Models: {metrics['num_models']}")
logger.info(f" Load: {metrics['load_time']:.2f}s")
logger.info(f" Annotation: {metrics['avg_annotation_time']:.2f}s/sample")
return True
def main():
"""Run all tests"""
logger.info("\n" + "=" * 60)
logger.info("ENSEMBLE TTS ANNOTATION - QUICK TEST")
logger.info("OPTION A - Balanced Mode (3 models)")
logger.info("=" * 60)
results = {
'model_loading': False,
'single_annotation': False,
'batch_processing': False,
'balanced_mode': False,
'benchmark': False
}
# Test 1: Model Loading
annotator, success = test_model_loading()
results['model_loading'] = success
if not success:
logger.error("\n❌ Model loading failed. Cannot continue tests.")
return False
# Test 2: Single Annotation
results['single_annotation'] = test_single_annotation(annotator)
# Test 3: Batch Processing
results['batch_processing'] = test_batch_processing(annotator)
# Test 4: Balanced Mode
results['balanced_mode'] = test_balanced_mode()
# Test 5: Benchmark
results['benchmark'] = benchmark_modes()
# Summary
logger.info("\n" + "=" * 60)
logger.info("TEST SUMMARY")
logger.info("=" * 60)
for test_name, success in results.items():
status = "βœ… PASS" if success else "❌ FAIL"
logger.info(f" {test_name}: {status}")
all_passed = all(results.values())
if all_passed:
logger.info("\nπŸŽ‰ ALL TESTS PASSED!")
logger.info("\nSystem is ready for production use.")
else:
logger.error("\n❌ SOME TESTS FAILED")
logger.error("\nPlease check the logs above for details.")
logger.info("\n" + "=" * 60)
return all_passed
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)