HAT / benchmarks /run_all_benchmarks.sh

Andrew Young

Upload folder using huggingface_hub

8ef2d83 verified about 2 months ago

6.54 kB

	#!/bin/bash
	#
	# HAT Benchmark Reproducibility Suite
	# ===================================
	#
	# This script runs all benchmarks from the HAT paper and generates
	# a comprehensive results report.
	#
	# Usage:
	# ./run_all_benchmarks.sh [--quick]
	#
	# Options:
	# --quick Run abbreviated benchmarks (faster, less thorough)
	#
	# Requirements:
	# - Rust toolchain (cargo)
	# - Python 3.8+ with venv
	# - ~2GB free disk space
	# - ~10 minutes for full suite, ~2 minutes for quick

	set -e

	SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
	RESULTS_DIR="$SCRIPT_DIR/results"
	TIMESTAMP=$(date +%Y%m%d_%H%M%S)
	RESULTS_FILE="$RESULTS_DIR/benchmark_results_$TIMESTAMP.txt"

	# Colors for output
	RED='\033[0;31m'
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	BLUE='\033[0;34m'
	NC='\033[0m' # No Color

	# Parse arguments
	QUICK_MODE=false
	if [[ "$1" == "--quick" ]]; then
	QUICK_MODE=true
	echo -e "${YELLOW}Running in quick mode (abbreviated benchmarks)${NC}"
	fi

	# Create results directory
	mkdir -p "$RESULTS_DIR"

	echo "========================================================================"
	echo " HAT Benchmark Reproducibility Suite"
	echo " $(date)"
	echo "========================================================================"
	echo ""
	echo "Project directory: $PROJECT_DIR"
	echo "Results will be saved to: $RESULTS_FILE"
	echo ""

	# Initialize results file
	cat > "$RESULTS_FILE" << EOF
	HAT Benchmark Results
	=====================
	Date: $(date)
	Host: $(hostname)
	Rust: $(rustc --version)
	Quick mode: $QUICK_MODE

	EOF

	cd "$PROJECT_DIR"

	# Function to run a test and capture results
	run_benchmark() {
	local name="$1"
	local test_name="$2"

	echo -e "${BLUE}[$name]${NC} Running..."
	echo "" >> "$RESULTS_FILE"
	echo "=== $name ===" >> "$RESULTS_FILE"
	echo "" >> "$RESULTS_FILE"

	if cargo test --test "$test_name" -- --nocapture 2>&1 \| tee -a "$RESULTS_FILE"; then
	echo -e "${GREEN}[$name]${NC} PASSED"
	else
	echo -e "${RED}[$name]${NC} FAILED"
	echo "FAILED" >> "$RESULTS_FILE"
	fi
	echo ""
	}

	echo "========================================================================"
	echo " Phase 1: Building Project"
	echo "========================================================================"

	echo "Building release version..."
	cargo build --release 2>&1 \| tail -5

	echo "Building test suite..."
	cargo build --tests 2>&1 \| tail -5

	echo ""
	echo "========================================================================"
	echo " Phase 2: Running Core Benchmarks"
	echo "========================================================================"

	# Phase 3.1: HAT vs HNSW
	echo ""
	echo "--- Phase 3.1: HAT vs HNSW Comparative Benchmark ---"
	run_benchmark "HAT vs HNSW" "phase31_hat_vs_hnsw"

	# Phase 3.2: Real Embeddings
	echo ""
	echo "--- Phase 3.2: Real Embedding Dimensions ---"
	run_benchmark "Real Embeddings" "phase32_real_embeddings"

	# Phase 3.3: Persistence
	echo ""
	echo "--- Phase 3.3: Persistence Layer ---"
	run_benchmark "Persistence" "phase33_persistence"

	# Phase 4.2: Attention State
	echo ""
	echo "--- Phase 4.2: Attention State Format ---"
	run_benchmark "Attention State" "phase42_attention_state"

	echo ""
	echo "========================================================================"
	echo " Phase 3: Python Integration Tests"
	echo "========================================================================"

	# Check for Python venv
	VENV_DIR="/tmp/arms-hat-bench-venv"

	if [[ ! -d "$VENV_DIR" ]]; then
	echo "Creating Python virtual environment..."
	python3 -m venv "$VENV_DIR"
	fi

	source "$VENV_DIR/bin/activate"

	# Install dependencies
	echo "Installing Python dependencies..."
	pip install -q maturin pytest 2>/dev/null \|\| true

	# Build Python extension
	echo "Building Python extension..."
	maturin develop --features python 2>&1 \| tail -3

	# Run Python tests
	echo ""
	echo "--- Python Binding Tests ---"
	echo "" >> "$RESULTS_FILE"
	echo "=== Python Binding Tests ===" >> "$RESULTS_FILE"
	echo "" >> "$RESULTS_FILE"

	if python -m pytest "$PROJECT_DIR/python/tests/" -v 2>&1 \| tee -a "$RESULTS_FILE"; then
	echo -e "${GREEN}[Python Tests]${NC} PASSED"
	else
	echo -e "${RED}[Python Tests]${NC} FAILED"
	fi

	echo ""
	echo "========================================================================"
	echo " Phase 4: End-to-End Demo"
	echo "========================================================================"

	echo "" >> "$RESULTS_FILE"
	echo "=== End-to-End Demo ===" >> "$RESULTS_FILE"
	echo "" >> "$RESULTS_FILE"

	# Check for sentence-transformers
	if pip show sentence-transformers >/dev/null 2>&1; then
	echo "Running end-to-end demo with real embeddings..."
	python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 \| tee -a "$RESULTS_FILE"
	else
	echo "Installing sentence-transformers for full demo..."
	pip install -q sentence-transformers 2>/dev/null \|\| true

	if pip show sentence-transformers >/dev/null 2>&1; then
	python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 \| tee -a "$RESULTS_FILE"
	else
	echo "Running demo with pseudo-embeddings (sentence-transformers not available)..."
	python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 \| tee -a "$RESULTS_FILE"
	fi
	fi

	deactivate

	echo ""
	echo "========================================================================"
	echo " Summary"
	echo "========================================================================"

	# Extract key metrics from results
	echo "" >> "$RESULTS_FILE"
	echo "=== Summary ===" >> "$RESULTS_FILE"
	echo "" >> "$RESULTS_FILE"

	# Count passed tests
	RUST_PASSED=$(grep -c "test .* ok" "$RESULTS_FILE" 2>/dev/null \|\| echo "0")
	PYTHON_PASSED=$(grep -c "PASSED" "$RESULTS_FILE" 2>/dev/null \|\| echo "0")

	echo "Results saved to: $RESULTS_FILE"
	echo ""
	echo "Key Results:"
	echo " - Rust tests passed: ~$RUST_PASSED"
	echo " - Python tests passed: ~$PYTHON_PASSED"
	echo ""

	# Extract recall metrics if available
	if grep -q "HAT enables 100% recall" "$RESULTS_FILE"; then
	echo -e "${GREEN}Core claim validated: 100% recall achieved${NC}"
	fi

	if grep -q "Average retrieval latency" "$RESULTS_FILE"; then
	LATENCY=$(grep "Average retrieval latency" "$RESULTS_FILE" \| tail -1 \| grep -oE '[0-9]+\.[0-9]+ms')
	echo " - Retrieval latency: $LATENCY"
	fi

	echo ""
	echo "========================================================================"
	echo " Benchmark Complete"
	echo "========================================================================"
	echo ""
	echo "Full results: $RESULTS_FILE"
	echo ""