|
|
""" |
|
|
Simple audio test without loading large models. |
|
|
|
|
|
Tests the annotation pipeline with mock predictions to validate |
|
|
the voting and aggregation logic without downloading models. |
|
|
""" |
|
|
|
|
|
import logging |
|
|
import sys |
|
|
from pathlib import Path |
|
|
import numpy as np |
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent)) |
|
|
|
|
|
from ensemble_tts.voting import WeightedVoting, MajorityVoting |
|
|
from datasets import load_from_disk |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
def test_voting_strategies(): |
|
|
"""Test voting strategies with mock predictions.""" |
|
|
logger.info("\n" + "="*60) |
|
|
logger.info("π³οΈ Testing Voting Strategies") |
|
|
logger.info("="*60) |
|
|
|
|
|
|
|
|
predictions = [ |
|
|
{"label": "happy", "confidence": 0.8, "model_name": "emotion2vec", "model_weight": 0.5}, |
|
|
{"label": "happy", "confidence": 0.7, "model_name": "whisper", "model_weight": 0.3}, |
|
|
{"label": "neutral", "confidence": 0.6, "model_name": "sensevoice", "model_weight": 0.2}, |
|
|
] |
|
|
|
|
|
|
|
|
logger.info("\nπ Majority Voting:") |
|
|
majority_voter = MajorityVoting() |
|
|
result = majority_voter.vote(predictions, key="label") |
|
|
logger.info(f" Winner: {result['label']}") |
|
|
logger.info(f" Confidence: {result['confidence']:.2%}") |
|
|
logger.info(f" Votes: {result['votes']}") |
|
|
|
|
|
|
|
|
logger.info("\nβοΈ Weighted Voting:") |
|
|
weighted_voter = WeightedVoting() |
|
|
result = weighted_voter.vote(predictions, key="label") |
|
|
logger.info(f" Winner: {result['label']}") |
|
|
logger.info(f" Confidence: {result['confidence']:.2%}") |
|
|
logger.info(f" Weighted votes: {result['weighted_votes']}") |
|
|
|
|
|
logger.info("\nβ
Voting strategies working correctly!") |
|
|
|
|
|
|
|
|
def test_synthetic_dataset(): |
|
|
"""Test with synthetic dataset metadata.""" |
|
|
dataset_path = Path("data/raw/synthetic") |
|
|
|
|
|
if not dataset_path.exists(): |
|
|
logger.warning(f"β οΈ Dataset not found: {dataset_path}") |
|
|
logger.info("Create it with:") |
|
|
logger.info(" python scripts/data/create_synthetic_test_data.py") |
|
|
return |
|
|
|
|
|
logger.info("\n" + "="*60) |
|
|
logger.info("π¦ Testing Synthetic Dataset") |
|
|
logger.info("="*60) |
|
|
|
|
|
logger.info(f"\n Dataset location: {dataset_path}") |
|
|
|
|
|
|
|
|
emotions = {} |
|
|
for emotion_dir in dataset_path.iterdir(): |
|
|
if emotion_dir.is_dir(): |
|
|
audio_files = list(emotion_dir.glob("*.wav")) |
|
|
emotions[emotion_dir.name] = len(audio_files) |
|
|
|
|
|
logger.info(f"\n Emotion distribution:") |
|
|
total = sum(emotions.values()) |
|
|
for emotion, count in sorted(emotions.items()): |
|
|
logger.info(f" {emotion:12s}: {count:3d} samples") |
|
|
logger.info(f" {'TOTAL':12s}: {total:3d} samples") |
|
|
|
|
|
|
|
|
logger.info(f"\n Testing 3 random audio files:") |
|
|
import random |
|
|
import soundfile as sf |
|
|
|
|
|
test_files = [] |
|
|
for emotion_dir in dataset_path.iterdir(): |
|
|
if emotion_dir.is_dir(): |
|
|
audio_files = list(emotion_dir.glob("*.wav")) |
|
|
if audio_files: |
|
|
test_files.append((emotion_dir.name, random.choice(audio_files))) |
|
|
|
|
|
for i, (emotion, audio_file) in enumerate(random.sample(test_files, min(3, len(test_files))), 1): |
|
|
audio_array, sr = sf.read(audio_file) |
|
|
|
|
|
logger.info(f"\n Sample {i}: {audio_file.name}") |
|
|
logger.info(f" True emotion: {emotion}") |
|
|
logger.info(f" Audio: {len(audio_array)/sr:.2f}s @ {sr}Hz") |
|
|
logger.info(f" Shape: {audio_array.shape}") |
|
|
logger.info(f" Range: [{audio_array.min():.3f}, {audio_array.max():.3f}]") |
|
|
|
|
|
|
|
|
mock_predictions = [ |
|
|
{"label": emotion, "confidence": 0.85, "model_name": "mock_model1", "model_weight": 0.5}, |
|
|
{"label": emotion, "confidence": 0.75, "model_name": "mock_model2", "model_weight": 0.3}, |
|
|
{"label": emotion, "confidence": 0.65, "model_name": "mock_model3", "model_weight": 0.2}, |
|
|
] |
|
|
|
|
|
voter = WeightedVoting() |
|
|
result = voter.vote(mock_predictions, key="label") |
|
|
logger.info(f" Predicted: {result['label']} ({result['confidence']:.2%})") |
|
|
logger.info(f" β
Match!" if result['label'] == emotion else f" β No match") |
|
|
|
|
|
logger.info("\nβ
Dataset test complete!") |
|
|
|
|
|
|
|
|
def test_audio_features(): |
|
|
"""Test audio feature extraction.""" |
|
|
logger.info("\n" + "="*60) |
|
|
logger.info("π΅ Testing Audio Features") |
|
|
logger.info("="*60) |
|
|
|
|
|
|
|
|
import soundfile as sf |
|
|
|
|
|
test_audio = Path("data/raw/synthetic/happy/happy_000.wav") |
|
|
if not test_audio.exists(): |
|
|
logger.warning(f"β οΈ Test audio not found: {test_audio}") |
|
|
return |
|
|
|
|
|
logger.info(f"\n Loading: {test_audio}") |
|
|
audio, sr = sf.read(test_audio) |
|
|
|
|
|
logger.info(f" Sample rate: {sr}Hz") |
|
|
logger.info(f" Duration: {len(audio)/sr:.2f}s") |
|
|
logger.info(f" Shape: {audio.shape}") |
|
|
logger.info(f" Range: [{audio.min():.3f}, {audio.max():.3f}]") |
|
|
|
|
|
|
|
|
import librosa |
|
|
|
|
|
logger.info(f"\n Extracting features...") |
|
|
|
|
|
|
|
|
rms = librosa.feature.rms(y=audio)[0] |
|
|
logger.info(f" RMS energy: mean={rms.mean():.4f}, std={rms.std():.4f}") |
|
|
|
|
|
|
|
|
zcr = librosa.feature.zero_crossing_rate(audio)[0] |
|
|
logger.info(f" Zero-crossing rate: mean={zcr.mean():.4f}") |
|
|
|
|
|
|
|
|
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0] |
|
|
logger.info(f" Spectral centroid: mean={spectral_centroid.mean():.1f}Hz") |
|
|
|
|
|
|
|
|
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13) |
|
|
logger.info(f" MFCCs shape: {mfccs.shape}") |
|
|
logger.info(f" MFCC[0] mean: {mfccs[0].mean():.2f}") |
|
|
|
|
|
logger.info(f"\nβ
Audio features extracted successfully!") |
|
|
|
|
|
|
|
|
def main(): |
|
|
logger.info("\n" + "="*60) |
|
|
logger.info("π§ͺ Simple Audio Test Suite") |
|
|
logger.info("="*60) |
|
|
logger.info("\nThis test validates the annotation pipeline without loading") |
|
|
logger.info("large models, using mock predictions and synthetic data.") |
|
|
|
|
|
try: |
|
|
|
|
|
test_voting_strategies() |
|
|
|
|
|
|
|
|
test_synthetic_dataset() |
|
|
|
|
|
|
|
|
test_audio_features() |
|
|
|
|
|
logger.info("\n" + "="*60) |
|
|
logger.info("β
ALL TESTS PASSED!") |
|
|
logger.info("="*60) |
|
|
|
|
|
logger.info("\nπ Next Steps:") |
|
|
logger.info(" 1. Run fine-tuning with SkyPilot:") |
|
|
logger.info(" sky launch scripts/cloud/skypilot_finetune.yaml") |
|
|
logger.info("\n 2. Or test locally with real models (requires GPU):") |
|
|
logger.info(" python scripts/test/test_quick.py") |
|
|
logger.info("\n 3. Annotate complete dataset:") |
|
|
logger.info(" sky launch scripts/cloud/skypilot_annotate_orpheus.yaml") |
|
|
|
|
|
return 0 |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"\nβ Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return 1 |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
sys.exit(main()) |
|
|
|