| | #!/bin/bash |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | set -e |
| |
|
| | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| | PROJECT_DIR="$(dirname "$SCRIPT_DIR")" |
| | RESULTS_DIR="$SCRIPT_DIR/results" |
| | TIMESTAMP=$(date +%Y%m%d_%H%M%S) |
| | RESULTS_FILE="$RESULTS_DIR/benchmark_results_$TIMESTAMP.txt" |
| |
|
| | |
| | RED='\033[0;31m' |
| | GREEN='\033[0;32m' |
| | YELLOW='\033[1;33m' |
| | BLUE='\033[0;34m' |
| | NC='\033[0m' |
| |
|
| | |
| | QUICK_MODE=false |
| | if [[ "$1" == "--quick" ]]; then |
| | QUICK_MODE=true |
| | echo -e "${YELLOW}Running in quick mode (abbreviated benchmarks)${NC}" |
| | fi |
| |
|
| | |
| | mkdir -p "$RESULTS_DIR" |
| |
|
| | echo "========================================================================" |
| | echo " HAT Benchmark Reproducibility Suite" |
| | echo " $(date)" |
| | echo "========================================================================" |
| | echo "" |
| | echo "Project directory: $PROJECT_DIR" |
| | echo "Results will be saved to: $RESULTS_FILE" |
| | echo "" |
| |
|
| | |
| | cat > "$RESULTS_FILE" << EOF |
| | HAT Benchmark Results |
| | ===================== |
| | Date: $(date) |
| | Host: $(hostname) |
| | Rust: $(rustc --version) |
| | Quick mode: $QUICK_MODE |
| | |
| | EOF |
| |
|
| | cd "$PROJECT_DIR" |
| |
|
| | |
| | run_benchmark() { |
| | local name="$1" |
| | local test_name="$2" |
| |
|
| | echo -e "${BLUE}[$name]${NC} Running..." |
| | echo "" >> "$RESULTS_FILE" |
| | echo "=== $name ===" >> "$RESULTS_FILE" |
| | echo "" >> "$RESULTS_FILE" |
| |
|
| | if cargo test --test "$test_name" -- --nocapture 2>&1 | tee -a "$RESULTS_FILE"; then |
| | echo -e "${GREEN}[$name]${NC} PASSED" |
| | else |
| | echo -e "${RED}[$name]${NC} FAILED" |
| | echo "FAILED" >> "$RESULTS_FILE" |
| | fi |
| | echo "" |
| | } |
| |
|
| | echo "========================================================================" |
| | echo " Phase 1: Building Project" |
| | echo "========================================================================" |
| |
|
| | echo "Building release version..." |
| | cargo build --release 2>&1 | tail -5 |
| |
|
| | echo "Building test suite..." |
| | cargo build --tests 2>&1 | tail -5 |
| |
|
| | echo "" |
| | echo "========================================================================" |
| | echo " Phase 2: Running Core Benchmarks" |
| | echo "========================================================================" |
| |
|
| | |
| | echo "" |
| | echo "--- Phase 3.1: HAT vs HNSW Comparative Benchmark ---" |
| | run_benchmark "HAT vs HNSW" "phase31_hat_vs_hnsw" |
| |
|
| | |
| | echo "" |
| | echo "--- Phase 3.2: Real Embedding Dimensions ---" |
| | run_benchmark "Real Embeddings" "phase32_real_embeddings" |
| |
|
| | |
| | echo "" |
| | echo "--- Phase 3.3: Persistence Layer ---" |
| | run_benchmark "Persistence" "phase33_persistence" |
| |
|
| | |
| | echo "" |
| | echo "--- Phase 4.2: Attention State Format ---" |
| | run_benchmark "Attention State" "phase42_attention_state" |
| |
|
| | echo "" |
| | echo "========================================================================" |
| | echo " Phase 3: Python Integration Tests" |
| | echo "========================================================================" |
| |
|
| | |
| | VENV_DIR="/tmp/arms-hat-bench-venv" |
| |
|
| | if [[ ! -d "$VENV_DIR" ]]; then |
| | echo "Creating Python virtual environment..." |
| | python3 -m venv "$VENV_DIR" |
| | fi |
| |
|
| | source "$VENV_DIR/bin/activate" |
| |
|
| | |
| | echo "Installing Python dependencies..." |
| | pip install -q maturin pytest 2>/dev/null || true |
| |
|
| | |
| | echo "Building Python extension..." |
| | maturin develop --features python 2>&1 | tail -3 |
| |
|
| | |
| | echo "" |
| | echo "--- Python Binding Tests ---" |
| | echo "" >> "$RESULTS_FILE" |
| | echo "=== Python Binding Tests ===" >> "$RESULTS_FILE" |
| | echo "" >> "$RESULTS_FILE" |
| |
|
| | if python -m pytest "$PROJECT_DIR/python/tests/" -v 2>&1 | tee -a "$RESULTS_FILE"; then |
| | echo -e "${GREEN}[Python Tests]${NC} PASSED" |
| | else |
| | echo -e "${RED}[Python Tests]${NC} FAILED" |
| | fi |
| |
|
| | echo "" |
| | echo "========================================================================" |
| | echo " Phase 4: End-to-End Demo" |
| | echo "========================================================================" |
| |
|
| | echo "" >> "$RESULTS_FILE" |
| | echo "=== End-to-End Demo ===" >> "$RESULTS_FILE" |
| | echo "" >> "$RESULTS_FILE" |
| |
|
| | |
| | if pip show sentence-transformers >/dev/null 2>&1; then |
| | echo "Running end-to-end demo with real embeddings..." |
| | python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
| | else |
| | echo "Installing sentence-transformers for full demo..." |
| | pip install -q sentence-transformers 2>/dev/null || true |
| |
|
| | if pip show sentence-transformers >/dev/null 2>&1; then |
| | python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
| | else |
| | echo "Running demo with pseudo-embeddings (sentence-transformers not available)..." |
| | python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
| | fi |
| | fi |
| |
|
| | deactivate |
| |
|
| | echo "" |
| | echo "========================================================================" |
| | echo " Summary" |
| | echo "========================================================================" |
| |
|
| | |
| | echo "" >> "$RESULTS_FILE" |
| | echo "=== Summary ===" >> "$RESULTS_FILE" |
| | echo "" >> "$RESULTS_FILE" |
| |
|
| | |
| | RUST_PASSED=$(grep -c "test .* ok" "$RESULTS_FILE" 2>/dev/null || echo "0") |
| | PYTHON_PASSED=$(grep -c "PASSED" "$RESULTS_FILE" 2>/dev/null || echo "0") |
| |
|
| | echo "Results saved to: $RESULTS_FILE" |
| | echo "" |
| | echo "Key Results:" |
| | echo " - Rust tests passed: ~$RUST_PASSED" |
| | echo " - Python tests passed: ~$PYTHON_PASSED" |
| | echo "" |
| |
|
| | |
| | if grep -q "HAT enables 100% recall" "$RESULTS_FILE"; then |
| | echo -e "${GREEN}Core claim validated: 100% recall achieved${NC}" |
| | fi |
| |
|
| | if grep -q "Average retrieval latency" "$RESULTS_FILE"; then |
| | LATENCY=$(grep "Average retrieval latency" "$RESULTS_FILE" | tail -1 | grep -oE '[0-9]+\.[0-9]+ms') |
| | echo " - Retrieval latency: $LATENCY" |
| | fi |
| |
|
| | echo "" |
| | echo "========================================================================" |
| | echo " Benchmark Complete" |
| | echo "========================================================================" |
| | echo "" |
| | echo "Full results: $RESULTS_FILE" |
| | echo "" |
| |
|