Spaces:

Scrapyard-Brampton
/

Testing

Sleeping

Testing / test_cuda.py

Sidak Singh

question boundary works

7b7db64 5 months ago

9.85 kB

	#!/usr/bin/env python3
	"""
	CUDA Test Script for Speech Transcription App

	This script helps users verify their CUDA setup and test performance
	between CPU and GPU configurations.

	Usage:
	python test_cuda.py
	"""

	import os
	import sys
	import time
	import torch
	import numpy as np
	from dotenv import load_dotenv

	def print_header(title):
	"""Print a formatted header"""
	print("\n" + "=" * 60)
	print(f" {title}")
	print("=" * 60)

	def print_section(title):
	"""Print a formatted section header"""
	print(f"\n🔍 {title}")
	print("-" * 40)

	def test_pytorch_cuda():
	"""Test PyTorch CUDA availability and performance"""
	print_section("PyTorch CUDA Test")

	print(f"PyTorch version: {torch.__version__}")
	print(f"CUDA available: {torch.cuda.is_available()}")

	if torch.cuda.is_available():
	print(f"CUDA version: {torch.version.cuda}")
	print(f"cuDNN version: {torch.backends.cudnn.version()}")
	print(f"Number of CUDA devices: {torch.cuda.device_count()}")

	for i in range(torch.cuda.device_count()):
	props = torch.cuda.get_device_properties(i)
	print(f"Device {i}: {props.name}")
	print(f" Memory: {props.total_memory / 1e9:.1f} GB")
	print(f" Compute capability: {props.major}.{props.minor}")
	else:
	print("❌ CUDA not available")
	return False

	return True

	def test_transformers_device():
	"""Test transformers library device detection"""
	print_section("Transformers Device Test")

	try:
	from transformers import pipeline

	# Test with CPU
	print("Testing CPU pipeline...")
	start_time = time.time()
	pipe_cpu = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
	result_cpu = pipe_cpu("This is a test sentence")
	cpu_time = time.time() - start_time
	print(f"✅ CPU pipeline loaded in {cpu_time:.2f}s")
	print(f"Result: {result_cpu}")

	# Test with CUDA if available
	if torch.cuda.is_available():
	print("\nTesting CUDA pipeline...")
	start_time = time.time()
	pipe_cuda = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=0)
	result_cuda = pipe_cuda("This is a test sentence")
	cuda_time = time.time() - start_time
	print(f"✅ CUDA pipeline loaded in {cuda_time:.2f}s")
	print(f"Result: {result_cuda}")

	speedup = cpu_time / cuda_time if cuda_time > 0 else 0
	print(f"\n🚀 Speedup: {speedup:.2f}x faster with CUDA")

	return True

	except Exception as e:
	print(f"❌ Error testing transformers: {e}")
	return False

	def test_whisper_models():
	"""Test Whisper model loading with different devices"""
	print_section("Whisper Model Test")

	try:
	from faster_whisper import WhisperModel

	# Test CPU model
	print("Testing Whisper on CPU...")
	start_time = time.time()
	model_cpu = WhisperModel("tiny.en", device="cpu", compute_type="int8")
	cpu_load_time = time.time() - start_time
	print(f"✅ CPU model loaded in {cpu_load_time:.2f}s")

	# Test CUDA model if available
	if torch.cuda.is_available():
	print("\nTesting Whisper on CUDA...")
	start_time = time.time()
	try:
	model_cuda = WhisperModel("tiny.en", device="cuda", compute_type="float16")
	cuda_load_time = time.time() - start_time
	print(f"✅ CUDA model loaded in {cuda_load_time:.2f}s")

	speedup = cpu_load_time / cuda_load_time if cuda_load_time > 0 else 0
	print(f"🚀 Load speedup: {speedup:.2f}x faster with CUDA")

	except Exception as e:
	print(f"❌ Error loading CUDA model: {e}")
	return False

	return True

	except ImportError:
	print("❌ faster-whisper not installed")
	return False
	except Exception as e:
	print(f"❌ Error testing Whisper: {e}")
	return False

	def test_memory_usage():
	"""Test GPU memory usage"""
	print_section("GPU Memory Test")

	if not torch.cuda.is_available():
	print("❌ CUDA not available for memory test")
	return False

	# Get initial memory
	torch.cuda.empty_cache()
	initial_memory = torch.cuda.memory_allocated()
	total_memory = torch.cuda.get_device_properties(0).total_memory

	print(f"Total GPU memory: {total_memory / 1e9:.1f} GB")
	print(f"Initial memory usage: {initial_memory / 1e6:.1f} MB")

	# Create a large tensor to test memory
	try:
	test_tensor = torch.randn(1000, 1000, device="cuda")
	allocated_memory = torch.cuda.memory_allocated()
	print(f"Memory after tensor allocation: {allocated_memory / 1e6:.1f} MB")
	print(f"Available memory: {(total_memory - allocated_memory) / 1e9:.1f} GB")

	# Clean up
	del test_tensor
	torch.cuda.empty_cache()
	print("✅ Memory test completed")
	return True

	except Exception as e:
	print(f"❌ Memory test failed: {e}")
	return False

	def test_environment_config():
	"""Test environment configuration"""
	print_section("Environment Configuration Test")

	# Load .env file if it exists
	env_file = os.path.join(os.path.dirname(__file__), '.env')
	if os.path.exists(env_file):
	load_dotenv(env_file)
	print(f"✅ Found .env file: {env_file}")
	else:
	print(f"ℹ️ No .env file found at: {env_file}")
	print(" Create one from .env.example to configure CUDA usage")

	# Check USE_CUDA setting
	use_cuda = os.getenv('USE_CUDA', 'false').lower() == 'true'
	print(f"USE_CUDA environment variable: {os.getenv('USE_CUDA', 'false')}")
	print(f"Parsed USE_CUDA value: {use_cuda}")

	# Test config import
	try:
	sys.path.append(os.path.dirname(__file__))
	from config import config
	print("✅ Config module imported successfully")

	device_info = config.get_device_info()
	print(f"Selected device: {device_info['device']}")
	print(f"Compute type: {device_info['compute_type']}")

	return True

	except Exception as e:
	print(f"❌ Error importing config: {e}")
	return False

	def run_performance_benchmark():
	"""Run a simple performance benchmark"""
	print_section("Performance Benchmark")

	if not torch.cuda.is_available():
	print("❌ CUDA not available for benchmark")
	return

	# Matrix multiplication benchmark
	size = 2000
	iterations = 5

	print(f"Running {iterations} matrix multiplications ({size}x{size})...")

	# CPU benchmark
	print("\nCPU benchmark:")
	cpu_times = []
	for i in range(iterations):
	a = torch.randn(size, size)
	b = torch.randn(size, size)

	start_time = time.time()
	c = torch.mm(a, b)
	cpu_time = time.time() - start_time
	cpu_times.append(cpu_time)
	print(f" Iteration {i+1}: {cpu_time:.3f}s")

	avg_cpu_time = sum(cpu_times) / len(cpu_times)
	print(f"Average CPU time: {avg_cpu_time:.3f}s")

	# CUDA benchmark
	print("\nCUDA benchmark:")
	cuda_times = []
	for i in range(iterations):
	a = torch.randn(size, size, device="cuda")
	b = torch.randn(size, size, device="cuda")

	torch.cuda.synchronize() # Wait for GPU
	start_time = time.time()
	c = torch.mm(a, b)
	torch.cuda.synchronize() # Wait for GPU
	cuda_time = time.time() - start_time
	cuda_times.append(cuda_time)
	print(f" Iteration {i+1}: {cuda_time:.3f}s")

	avg_cuda_time = sum(cuda_times) / len(cuda_times)
	print(f"Average CUDA time: {avg_cuda_time:.3f}s")

	speedup = avg_cpu_time / avg_cuda_time
	print(f"\n🚀 Overall speedup: {speedup:.2f}x faster with CUDA")

	def main():
	"""Main test function"""
	print_header("CUDA Configuration Test for Speech Transcription App")

	print("This script will test your CUDA setup and help you configure")
	print("the speech transcription app for optimal performance.")

	# Run tests
	tests_passed = 0
	total_tests = 5

	if test_pytorch_cuda():
	tests_passed += 1

	if test_transformers_device():
	tests_passed += 1

	if test_whisper_models():
	tests_passed += 1

	if test_memory_usage():
	tests_passed += 1

	if test_environment_config():
	tests_passed += 1

	# Performance benchmark (optional)
	if torch.cuda.is_available():
	try:
	run_performance_benchmark()
	except Exception as e:
	print(f"❌ Benchmark failed: {e}")

	# Summary
	print_header("Test Summary")
	print(f"Tests passed: {tests_passed}/{total_tests}")

	if tests_passed == total_tests and torch.cuda.is_available():
	print("🎉 All tests passed! Your CUDA setup is working correctly.")
	print("\nTo enable CUDA acceleration:")
	print("1. Create a .env file (copy from .env.example)")
	print("2. Set USE_CUDA=true in the .env file")
	print("3. Run the speech transcription app")
	elif torch.cuda.is_available():
	print("⚠️ Some tests failed. Check the error messages above.")
	print("You may still be able to use CUDA, but with potential issues.")
	else:
	print("ℹ️ CUDA not available. The app will run on CPU.")
	print("This is perfectly fine for most use cases!")

	print("\nFor CPU usage (always works):")
	print("1. Create a .env file (copy from .env.example)")
	print("2. Set USE_CUDA=false in the .env file")
	print("3. Run the speech transcription app")

	if __name__ == "__main__":
	main()