File size: 8,841 Bytes

98938e3

"""
Quick test script for OPTION A ensemble.

Tests:
1. Model loading
2. Single audio annotation
3. Batch processing
4. Performance benchmarking
"""

import sys
import logging
import time
import numpy as np
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from ensemble_tts import EnsembleAnnotator

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def test_model_loading():
    """Test 1: Model Loading"""
    logger.info("=" * 60)
    logger.info("TEST 1: Model Loading")
    logger.info("=" * 60)

    try:
        annotator = EnsembleAnnotator(
            mode='quick',  # Start with quick mode for faster testing
            device='cpu',
            enable_events=False  # Disable events for faster testing
        )

        start = time.time()
        annotator.load_models()
        elapsed = time.time() - start

        logger.info(f"✅ Models loaded successfully in {elapsed:.2f}s")
        return annotator, True
    except Exception as e:
        logger.error(f"❌ Model loading failed: {e}")
        return None, False


def test_single_annotation(annotator):
    """Test 2: Single Audio Annotation"""
    logger.info("\n" + "=" * 60)
    logger.info("TEST 2: Single Audio Annotation")
    logger.info("=" * 60)

    try:
        # Generate dummy audio (3 seconds)
        audio = np.random.randn(16000 * 3).astype(np.float32)

        start = time.time()
        result = annotator.annotate(audio, sample_rate=16000)
        elapsed = time.time() - start

        logger.info(f"\n📊 Annotation Result:")
        logger.info(f"  Emotion: {result['emotion']['label']}")
        logger.info(f"  Confidence: {result['emotion']['confidence']:.2%}")
        logger.info(f"  Agreement: {result['emotion']['agreement']:.2%}")
        logger.info(f"  Votes: {result['emotion']['votes']}")
        logger.info(f"  Time: {elapsed:.2f}s")

        # Validate result structure
        assert 'emotion' in result
        assert 'label' in result['emotion']
        assert 'confidence' in result['emotion']
        assert result['emotion']['confidence'] >= 0 and result['emotion']['confidence'] <= 1

        logger.info(f"\n✅ Single annotation successful")
        return True
    except Exception as e:
        logger.error(f"❌ Single annotation failed: {e}")
        import traceback
        traceback.print_exc()
        return False


def test_batch_processing(annotator):
    """Test 3: Batch Processing"""
    logger.info("\n" + "=" * 60)
    logger.info("TEST 3: Batch Processing")
    logger.info("=" * 60)

    try:
        # Generate 5 dummy audio samples
        batch_size = 5
        audios = [np.random.randn(16000 * (i + 1)).astype(np.float32) for i in range(batch_size)]

        start = time.time()
        results = annotator.annotate_batch(audios, sample_rates=[16000] * batch_size)
        elapsed = time.time() - start

        logger.info(f"\n📊 Batch Results:")
        for i, result in enumerate(results):
            logger.info(f"  Sample {i+1}: {result['emotion']['label']} ({result['emotion']['confidence']:.2%})")

        logger.info(f"\n  Total time: {elapsed:.2f}s")
        logger.info(f"  Average time per sample: {elapsed/batch_size:.2f}s")

        # Validate
        assert len(results) == batch_size

        logger.info(f"\n✅ Batch processing successful")
        return True
    except Exception as e:
        logger.error(f"❌ Batch processing failed: {e}")
        import traceback
        traceback.print_exc()
        return False


def test_balanced_mode():
    """Test 4: Balanced Mode (OPTION A)"""
    logger.info("\n" + "=" * 60)
    logger.info("TEST 4: Balanced Mode (OPTION A)")
    logger.info("=" * 60)

    try:
        annotator_balanced = EnsembleAnnotator(
            mode='balanced',  # 3 models
            device='cpu',
            enable_events=False
        )

        start = time.time()
        annotator_balanced.load_models()
        load_time = time.time() - start
        logger.info(f"  Load time: {load_time:.2f}s")

        # Test annotation
        audio = np.random.randn(16000 * 3).astype(np.float32)

        start = time.time()
        result = annotator_balanced.annotate(audio, sample_rate=16000)
        annotate_time = time.time() - start

        logger.info(f"\n📊 Balanced Mode Result:")
        logger.info(f"  Emotion: {result['emotion']['label']}")
        logger.info(f"  Confidence: {result['emotion']['confidence']:.2%}")
        logger.info(f"  Agreement: {result['emotion']['agreement']:.2%}")
        logger.info(f"  Number of predictions: {len(result['emotion']['predictions'])}")
        logger.info(f"  Annotation time: {annotate_time:.2f}s")

        # Should have 3 model predictions (OPTION A)
        assert len(result['emotion']['predictions']) == 3, \
            f"Expected 3 predictions, got {len(result['emotion']['predictions'])}"

        logger.info(f"\n✅ Balanced mode (OPTION A) successful")
        return True
    except Exception as e:
        logger.error(f"❌ Balanced mode failed: {e}")
        import traceback
        traceback.print_exc()
        return False


def benchmark_modes():
    """Test 5: Benchmark All Modes"""
    logger.info("\n" + "=" * 60)
    logger.info("TEST 5: Performance Benchmark")
    logger.info("=" * 60)

    modes = ['quick', 'balanced']
    audio = np.random.randn(16000 * 3).astype(np.float32)

    results = {}

    for mode in modes:
        logger.info(f"\n📊 Testing {mode.upper()} mode...")

        try:
            annotator = EnsembleAnnotator(
                mode=mode,
                device='cpu',
                enable_events=False
            )

            # Load time
            start = time.time()
            annotator.load_models()
            load_time = time.time() - start

            # Annotation time (average of 3 runs)
            times = []
            for _ in range(3):
                start = time.time()
                result = annotator.annotate(audio, sample_rate=16000)
                times.append(time.time() - start)

            avg_time = np.mean(times)

            results[mode] = {
                'load_time': load_time,
                'avg_annotation_time': avg_time,
                'num_models': len(result['emotion']['predictions'])
            }

            logger.info(f"  Load time: {load_time:.2f}s")
            logger.info(f"  Avg annotation time: {avg_time:.2f}s")
            logger.info(f"  Models: {results[mode]['num_models']}")

        except Exception as e:
            logger.error(f"  ❌ {mode} mode failed: {e}")
            results[mode] = {'error': str(e)}

    # Summary
    logger.info("\n" + "=" * 60)
    logger.info("BENCHMARK SUMMARY")
    logger.info("=" * 60)

    for mode, metrics in results.items():
        if 'error' not in metrics:
            logger.info(f"\n{mode.upper()} MODE:")
            logger.info(f"  Models: {metrics['num_models']}")
            logger.info(f"  Load: {metrics['load_time']:.2f}s")
            logger.info(f"  Annotation: {metrics['avg_annotation_time']:.2f}s/sample")

    return True


def main():
    """Run all tests"""
    logger.info("\n" + "=" * 60)
    logger.info("ENSEMBLE TTS ANNOTATION - QUICK TEST")
    logger.info("OPTION A - Balanced Mode (3 models)")
    logger.info("=" * 60)

    results = {
        'model_loading': False,
        'single_annotation': False,
        'batch_processing': False,
        'balanced_mode': False,
        'benchmark': False
    }

    # Test 1: Model Loading
    annotator, success = test_model_loading()
    results['model_loading'] = success

    if not success:
        logger.error("\n❌ Model loading failed. Cannot continue tests.")
        return False

    # Test 2: Single Annotation
    results['single_annotation'] = test_single_annotation(annotator)

    # Test 3: Batch Processing
    results['batch_processing'] = test_batch_processing(annotator)

    # Test 4: Balanced Mode
    results['balanced_mode'] = test_balanced_mode()

    # Test 5: Benchmark
    results['benchmark'] = benchmark_modes()

    # Summary
    logger.info("\n" + "=" * 60)
    logger.info("TEST SUMMARY")
    logger.info("=" * 60)

    for test_name, success in results.items():
        status = "✅ PASS" if success else "❌ FAIL"
        logger.info(f"  {test_name}: {status}")

    all_passed = all(results.values())

    if all_passed:
        logger.info("\n🎉 ALL TESTS PASSED!")
        logger.info("\nSystem is ready for production use.")
    else:
        logger.error("\n❌ SOME TESTS FAILED")
        logger.error("\nPlease check the logs above for details.")

    logger.info("\n" + "=" * 60)

    return all_passed


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)