#!/usr/bin/env python3 """ CUDA Test Script for Speech Transcription App This script helps users verify their CUDA setup and test performance between CPU and GPU configurations. Usage: python test_cuda.py """ import os import sys import time import torch import numpy as np from dotenv import load_dotenv def print_header(title): """Print a formatted header""" print("\n" + "=" * 60) print(f" {title}") print("=" * 60) def print_section(title): """Print a formatted section header""" print(f"\nšŸ” {title}") print("-" * 40) def test_pytorch_cuda(): """Test PyTorch CUDA availability and performance""" print_section("PyTorch CUDA Test") print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA version: {torch.version.cuda}") print(f"cuDNN version: {torch.backends.cudnn.version()}") print(f"Number of CUDA devices: {torch.cuda.device_count()}") for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) print(f"Device {i}: {props.name}") print(f" Memory: {props.total_memory / 1e9:.1f} GB") print(f" Compute capability: {props.major}.{props.minor}") else: print("āŒ CUDA not available") return False return True def test_transformers_device(): """Test transformers library device detection""" print_section("Transformers Device Test") try: from transformers import pipeline # Test with CPU print("Testing CPU pipeline...") start_time = time.time() pipe_cpu = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1) result_cpu = pipe_cpu("This is a test sentence") cpu_time = time.time() - start_time print(f"āœ… CPU pipeline loaded in {cpu_time:.2f}s") print(f"Result: {result_cpu}") # Test with CUDA if available if torch.cuda.is_available(): print("\nTesting CUDA pipeline...") start_time = time.time() pipe_cuda = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=0) result_cuda = pipe_cuda("This is a test sentence") cuda_time = time.time() - start_time print(f"āœ… CUDA pipeline loaded in {cuda_time:.2f}s") print(f"Result: {result_cuda}") speedup = cpu_time / cuda_time if cuda_time > 0 else 0 print(f"\nšŸš€ Speedup: {speedup:.2f}x faster with CUDA") return True except Exception as e: print(f"āŒ Error testing transformers: {e}") return False def test_whisper_models(): """Test Whisper model loading with different devices""" print_section("Whisper Model Test") try: from faster_whisper import WhisperModel # Test CPU model print("Testing Whisper on CPU...") start_time = time.time() model_cpu = WhisperModel("tiny.en", device="cpu", compute_type="int8") cpu_load_time = time.time() - start_time print(f"āœ… CPU model loaded in {cpu_load_time:.2f}s") # Test CUDA model if available if torch.cuda.is_available(): print("\nTesting Whisper on CUDA...") start_time = time.time() try: model_cuda = WhisperModel("tiny.en", device="cuda", compute_type="float16") cuda_load_time = time.time() - start_time print(f"āœ… CUDA model loaded in {cuda_load_time:.2f}s") speedup = cpu_load_time / cuda_load_time if cuda_load_time > 0 else 0 print(f"šŸš€ Load speedup: {speedup:.2f}x faster with CUDA") except Exception as e: print(f"āŒ Error loading CUDA model: {e}") return False return True except ImportError: print("āŒ faster-whisper not installed") return False except Exception as e: print(f"āŒ Error testing Whisper: {e}") return False def test_memory_usage(): """Test GPU memory usage""" print_section("GPU Memory Test") if not torch.cuda.is_available(): print("āŒ CUDA not available for memory test") return False # Get initial memory torch.cuda.empty_cache() initial_memory = torch.cuda.memory_allocated() total_memory = torch.cuda.get_device_properties(0).total_memory print(f"Total GPU memory: {total_memory / 1e9:.1f} GB") print(f"Initial memory usage: {initial_memory / 1e6:.1f} MB") # Create a large tensor to test memory try: test_tensor = torch.randn(1000, 1000, device="cuda") allocated_memory = torch.cuda.memory_allocated() print(f"Memory after tensor allocation: {allocated_memory / 1e6:.1f} MB") print(f"Available memory: {(total_memory - allocated_memory) / 1e9:.1f} GB") # Clean up del test_tensor torch.cuda.empty_cache() print("āœ… Memory test completed") return True except Exception as e: print(f"āŒ Memory test failed: {e}") return False def test_environment_config(): """Test environment configuration""" print_section("Environment Configuration Test") # Load .env file if it exists env_file = os.path.join(os.path.dirname(__file__), '.env') if os.path.exists(env_file): load_dotenv(env_file) print(f"āœ… Found .env file: {env_file}") else: print(f"ā„¹ļø No .env file found at: {env_file}") print(" Create one from .env.example to configure CUDA usage") # Check USE_CUDA setting use_cuda = os.getenv('USE_CUDA', 'false').lower() == 'true' print(f"USE_CUDA environment variable: {os.getenv('USE_CUDA', 'false')}") print(f"Parsed USE_CUDA value: {use_cuda}") # Test config import try: sys.path.append(os.path.dirname(__file__)) from config import config print("āœ… Config module imported successfully") device_info = config.get_device_info() print(f"Selected device: {device_info['device']}") print(f"Compute type: {device_info['compute_type']}") return True except Exception as e: print(f"āŒ Error importing config: {e}") return False def run_performance_benchmark(): """Run a simple performance benchmark""" print_section("Performance Benchmark") if not torch.cuda.is_available(): print("āŒ CUDA not available for benchmark") return # Matrix multiplication benchmark size = 2000 iterations = 5 print(f"Running {iterations} matrix multiplications ({size}x{size})...") # CPU benchmark print("\nCPU benchmark:") cpu_times = [] for i in range(iterations): a = torch.randn(size, size) b = torch.randn(size, size) start_time = time.time() c = torch.mm(a, b) cpu_time = time.time() - start_time cpu_times.append(cpu_time) print(f" Iteration {i+1}: {cpu_time:.3f}s") avg_cpu_time = sum(cpu_times) / len(cpu_times) print(f"Average CPU time: {avg_cpu_time:.3f}s") # CUDA benchmark print("\nCUDA benchmark:") cuda_times = [] for i in range(iterations): a = torch.randn(size, size, device="cuda") b = torch.randn(size, size, device="cuda") torch.cuda.synchronize() # Wait for GPU start_time = time.time() c = torch.mm(a, b) torch.cuda.synchronize() # Wait for GPU cuda_time = time.time() - start_time cuda_times.append(cuda_time) print(f" Iteration {i+1}: {cuda_time:.3f}s") avg_cuda_time = sum(cuda_times) / len(cuda_times) print(f"Average CUDA time: {avg_cuda_time:.3f}s") speedup = avg_cpu_time / avg_cuda_time print(f"\nšŸš€ Overall speedup: {speedup:.2f}x faster with CUDA") def main(): """Main test function""" print_header("CUDA Configuration Test for Speech Transcription App") print("This script will test your CUDA setup and help you configure") print("the speech transcription app for optimal performance.") # Run tests tests_passed = 0 total_tests = 5 if test_pytorch_cuda(): tests_passed += 1 if test_transformers_device(): tests_passed += 1 if test_whisper_models(): tests_passed += 1 if test_memory_usage(): tests_passed += 1 if test_environment_config(): tests_passed += 1 # Performance benchmark (optional) if torch.cuda.is_available(): try: run_performance_benchmark() except Exception as e: print(f"āŒ Benchmark failed: {e}") # Summary print_header("Test Summary") print(f"Tests passed: {tests_passed}/{total_tests}") if tests_passed == total_tests and torch.cuda.is_available(): print("šŸŽ‰ All tests passed! Your CUDA setup is working correctly.") print("\nTo enable CUDA acceleration:") print("1. Create a .env file (copy from .env.example)") print("2. Set USE_CUDA=true in the .env file") print("3. Run the speech transcription app") elif torch.cuda.is_available(): print("āš ļø Some tests failed. Check the error messages above.") print("You may still be able to use CUDA, but with potential issues.") else: print("ā„¹ļø CUDA not available. The app will run on CPU.") print("This is perfectly fine for most use cases!") print("\nFor CPU usage (always works):") print("1. Create a .env file (copy from .env.example)") print("2. Set USE_CUDA=false in the .env file") print("3. Run the speech transcription app") if __name__ == "__main__": main()