Spaces:
Sleeping
Sleeping
Gül Sena Altıntaş
commited on
Commit
·
889a42a
1
Parent(s):
1d3a5fe
Improved serving script
Browse filesAdded gemma supertoken model
Small bug persists with reading HF_TOKEN
- app.py +6 -2
- serve_on_killarney.sh +62 -31
app.py
CHANGED
|
@@ -9,11 +9,13 @@ import re
|
|
| 9 |
import logging
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
import gc
|
|
|
|
| 12 |
|
| 13 |
# Set up logging
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
|
|
|
| 17 |
# Model configurations - maps display names to HF model paths
|
| 18 |
PREDEFINED_MODELS = [
|
| 19 |
"meta-llama/Llama-3.2-1B",
|
|
@@ -25,8 +27,8 @@ PREDEFINED_MODELS = [
|
|
| 25 |
"CohereForAI/aya-expanse-8b",
|
| 26 |
"common-pile/comma-v0.1-2t",
|
| 27 |
"google/byt5-small",
|
| 28 |
-
"google/byt5-small",
|
| 29 |
"gsaltintas/supertoken_models-llama_gpt2",
|
|
|
|
| 30 |
]
|
| 31 |
# Global cache for loaded models
|
| 32 |
model_cache = {}
|
|
@@ -104,10 +106,10 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
| 104 |
if progress_callback:
|
| 105 |
progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
|
| 106 |
|
| 107 |
-
logger.info(f"Loading model: {model_path}")
|
| 108 |
|
| 109 |
# Check if CUDA is available
|
| 110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 111 |
|
| 112 |
if progress_callback:
|
| 113 |
progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
|
|
@@ -122,6 +124,8 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
| 122 |
if progress_callback:
|
| 123 |
progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
|
| 124 |
|
|
|
|
|
|
|
| 125 |
# Load model with appropriate settings
|
| 126 |
model = AutoModelForCausalLM.from_pretrained(
|
| 127 |
model_path,
|
|
|
|
| 9 |
import logging
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
import gc
|
| 12 |
+
import os
|
| 13 |
|
| 14 |
# Set up logging
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
+
print("hf_toke_fromglobal", os.environ.get("HF_TOKEN"))
|
| 19 |
# Model configurations - maps display names to HF model paths
|
| 20 |
PREDEFINED_MODELS = [
|
| 21 |
"meta-llama/Llama-3.2-1B",
|
|
|
|
| 27 |
"CohereForAI/aya-expanse-8b",
|
| 28 |
"common-pile/comma-v0.1-2t",
|
| 29 |
"google/byt5-small",
|
|
|
|
| 30 |
"gsaltintas/supertoken_models-llama_gpt2",
|
| 31 |
+
"gsaltintas/supertoken_models-llama_google-gemma-2-2b"
|
| 32 |
]
|
| 33 |
# Global cache for loaded models
|
| 34 |
model_cache = {}
|
|
|
|
| 106 |
if progress_callback:
|
| 107 |
progress_callback(0.1, f"🔄 Starting to load model: {model_path}")
|
| 108 |
|
|
|
|
| 109 |
|
| 110 |
# Check if CUDA is available
|
| 111 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 112 |
+
logger.info(f"Loading model: {model_path} using device: {device}")
|
| 113 |
|
| 114 |
if progress_callback:
|
| 115 |
progress_callback(0.2, f"📥 Loading tokenizer for {model_path}...")
|
|
|
|
| 124 |
if progress_callback:
|
| 125 |
progress_callback(0.5, f"🧠 Loading model weights for {model_path}... (this may take a while)")
|
| 126 |
|
| 127 |
+
logger.info(os.getcwd())
|
| 128 |
+
logger.info("hf token", os.environ.get("HF_TOKEN"))
|
| 129 |
# Load model with appropriate settings
|
| 130 |
model = AutoModelForCausalLM.from_pretrained(
|
| 131 |
model_path,
|
serve_on_killarney.sh
CHANGED
|
@@ -5,21 +5,24 @@ CLUSTER_HOST="killarney"
|
|
| 5 |
CLUSTER_USER="gsa"
|
| 6 |
|
| 7 |
# Job configuration
|
|
|
|
| 8 |
SCRIPT_NAME="gradio_job.slurm"
|
| 9 |
-
|
|
|
|
| 10 |
JOB_NAME="gradio-app"
|
| 11 |
-
|
|
|
|
| 12 |
NODES=1
|
| 13 |
NTASKS_PER_NODE=1
|
| 14 |
CPUS_PER_TASK=4
|
| 15 |
MEM="8G"
|
| 16 |
TIME="02:00:00"
|
| 17 |
-
GRADIO_PORT=
|
| 18 |
-
|
| 19 |
-
script_location="
|
| 20 |
|
| 21 |
-
ENV_PATH="/home/
|
| 22 |
-
|
| 23 |
|
| 24 |
# Function to cleanup temporary files
|
| 25 |
cleanup() {
|
|
@@ -37,14 +40,14 @@ trap cleanup EXIT INT TERM
|
|
| 37 |
cat > "$SCRIPT_NAME" << EOF
|
| 38 |
#!/bin/bash
|
| 39 |
#SBATCH --job-name=$JOB_NAME
|
| 40 |
-
#SBATCH --
|
| 41 |
#SBATCH --nodes=$NODES
|
| 42 |
#SBATCH --ntasks-per-node=$NTASKS_PER_NODE
|
| 43 |
#SBATCH --cpus-per-task=$CPUS_PER_TASK
|
| 44 |
#SBATCH --mem=$MEM
|
| 45 |
#SBATCH --time=$TIME
|
| 46 |
#SBATCH --account=$ACCOUNT
|
| 47 |
-
#SBATCH --output=$
|
| 48 |
|
| 49 |
# Print job info
|
| 50 |
echo "Job started on node: \$(hostname)"
|
|
@@ -57,15 +60,15 @@ echo "Starting time: \$(date)"
|
|
| 57 |
module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
|
| 58 |
|
| 59 |
# Activate virtual environment
|
| 60 |
-
source $ENV_PATH
|
| 61 |
|
| 62 |
# Set up environment
|
| 63 |
export GRADIO_SERVER_NAME="0.0.0.0"
|
| 64 |
export GRADIO_SERVER_PORT=$GRADIO_PORT
|
| 65 |
|
| 66 |
# Start Gradio app
|
| 67 |
-
echo "Starting Gradio app on port $GRADIO_PORT..."
|
| 68 |
-
|
| 69 |
|
| 70 |
# Keep the job alive
|
| 71 |
echo "Gradio app finished at: \$(date)"
|
|
@@ -81,7 +84,7 @@ if [ $? -ne 0 ]; then
|
|
| 81 |
fi
|
| 82 |
|
| 83 |
echo "Submitting job to cluster..."
|
| 84 |
-
JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd
|
| 85 |
|
| 86 |
if [ $? -ne 0 ]; then
|
| 87 |
echo "Error: Failed to submit job to cluster"
|
|
@@ -122,7 +125,7 @@ done
|
|
| 122 |
|
| 123 |
# Get the allocated node
|
| 124 |
NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
|
| 125 |
-
echo "Job is running on node: $NODE"
|
| 126 |
|
| 127 |
# Wait a moment for the Gradio app to start
|
| 128 |
echo "Waiting for Gradio app to initialize..."
|
|
@@ -147,10 +150,50 @@ if [ -n "$GRADIO_CHECK" ]; then
|
|
| 147 |
else
|
| 148 |
echo "⚠ Warning: Gradio app may not have started properly"
|
| 149 |
echo "Check the job output:"
|
| 150 |
-
ssh "$CLUSTER_USER@$CLUSTER_HOST" \
|
| 151 |
-
"bash -l -c 'tail ${JOB_ID}.out'"
|
| 152 |
fi
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
# Connection info
|
| 155 |
cat <<EOF
|
| 156 |
|
|
@@ -168,29 +211,17 @@ Alternative direct SSH with forwarding:
|
|
| 168 |
ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
|
| 169 |
|
| 170 |
Check job status:
|
| 171 |
-
ssh $CLUSTER_USER@$CLUSTER_HOST 'squeue -j $JOB_ID '
|
| 172 |
|
| 173 |
Cancel job:
|
| 174 |
-
ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID '
|
| 175 |
=========================================
|
| 176 |
|
| 177 |
EOF
|
| 178 |
-
|
| 179 |
-
# Optional port forwarding
|
| 180 |
-
read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
|
| 181 |
-
echo ""
|
| 182 |
-
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 183 |
-
echo "Setting up port forwarding..."
|
| 184 |
-
ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
| 185 |
-
-t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
|
| 186 |
-
echo ""
|
| 187 |
-
echo "Port forwarding ended."
|
| 188 |
-
else
|
| 189 |
-
echo "Skipping port forwarding."
|
| 190 |
echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
|
| 191 |
fi
|
| 192 |
|
| 193 |
echo ""
|
| 194 |
echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
|
| 195 |
-
echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
|
| 196 |
|
|
|
|
| 5 |
CLUSTER_USER="gsa"
|
| 6 |
|
| 7 |
# Job configuration
|
| 8 |
+
ACCOUNT="aip-craffel"
|
| 9 |
SCRIPT_NAME="gradio_job.slurm"
|
| 10 |
+
APP_DIR="/project/$ACCOUNT/$CLUSTER_USER/quick-tokenizer-accuracy"
|
| 11 |
+
APP_PATH="app.py"
|
| 12 |
JOB_NAME="gradio-app"
|
| 13 |
+
GPU_TYPE="l40s"
|
| 14 |
+
NUM_GPUS=1
|
| 15 |
NODES=1
|
| 16 |
NTASKS_PER_NODE=1
|
| 17 |
CPUS_PER_TASK=4
|
| 18 |
MEM="8G"
|
| 19 |
TIME="02:00:00"
|
| 20 |
+
GRADIO_PORT=7861
|
| 21 |
+
LOCAL_PORT=7861
|
| 22 |
+
script_location="$APP_DIR/$SCRIPT_NAME"
|
| 23 |
|
| 24 |
+
ENV_PATH="/home/$CLUSTER_USER/tokenizers/.venv/bin/activate"
|
| 25 |
+
OUTPUT_DIR="/project/$ACCOUNT/$CLUSTER_USER/.slurm"
|
| 26 |
|
| 27 |
# Function to cleanup temporary files
|
| 28 |
cleanup() {
|
|
|
|
| 40 |
cat > "$SCRIPT_NAME" << EOF
|
| 41 |
#!/bin/bash
|
| 42 |
#SBATCH --job-name=$JOB_NAME
|
| 43 |
+
#SBATCH --gres=gpu:$GPU_TYPE:$NUM_GPUS
|
| 44 |
#SBATCH --nodes=$NODES
|
| 45 |
#SBATCH --ntasks-per-node=$NTASKS_PER_NODE
|
| 46 |
#SBATCH --cpus-per-task=$CPUS_PER_TASK
|
| 47 |
#SBATCH --mem=$MEM
|
| 48 |
#SBATCH --time=$TIME
|
| 49 |
#SBATCH --account=$ACCOUNT
|
| 50 |
+
#SBATCH --output=$OUTPUT_DIR/%j.out
|
| 51 |
|
| 52 |
# Print job info
|
| 53 |
echo "Job started on node: \$(hostname)"
|
|
|
|
| 60 |
module load slurm/killarney/24.05.7 StdEnv/2023 gcc/13.3 openmpi/5.0.3 cuda/12.6 python/3.10.13
|
| 61 |
|
| 62 |
# Activate virtual environment
|
| 63 |
+
source "${ENV_PATH}"
|
| 64 |
|
| 65 |
# Set up environment
|
| 66 |
export GRADIO_SERVER_NAME="0.0.0.0"
|
| 67 |
export GRADIO_SERVER_PORT=$GRADIO_PORT
|
| 68 |
|
| 69 |
# Start Gradio app
|
| 70 |
+
echo "Starting Gradio app on port ${GRADIO_PORT}..."
|
| 71 |
+
gradio "${APP_PATH}" --watch-dirs "${APP_DIR}"
|
| 72 |
|
| 73 |
# Keep the job alive
|
| 74 |
echo "Gradio app finished at: \$(date)"
|
|
|
|
| 84 |
fi
|
| 85 |
|
| 86 |
echo "Submitting job to cluster..."
|
| 87 |
+
JOB_ID=$(ssh -t "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'cd $APP_DIR && sbatch --parsable $script_location'")
|
| 88 |
|
| 89 |
if [ $? -ne 0 ]; then
|
| 90 |
echo "Error: Failed to submit job to cluster"
|
|
|
|
| 125 |
|
| 126 |
# Get the allocated node
|
| 127 |
NODE=$(ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'squeue -j $JOB_ID -h -o \"%N\"'")
|
| 128 |
+
echo "Job (${JOB_ID}) is running on node: ${NODE}"
|
| 129 |
|
| 130 |
# Wait a moment for the Gradio app to start
|
| 131 |
echo "Waiting for Gradio app to initialize..."
|
|
|
|
| 150 |
else
|
| 151 |
echo "⚠ Warning: Gradio app may not have started properly"
|
| 152 |
echo "Check the job output:"
|
| 153 |
+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'tail \"${OUTPUT_DIR}/${JOB_ID}.out\"'"
|
|
|
|
| 154 |
fi
|
| 155 |
|
| 156 |
+
|
| 157 |
+
cancel_job() {
|
| 158 |
+
read -p "Would you like to cancel the job? (y/n): " -n 1 -r
|
| 159 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 160 |
+
## job id known only remotely
|
| 161 |
+
# ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel \${JOB_ID}'"
|
| 162 |
+
ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID} '"
|
| 163 |
+
# ssh "$CLUSTER_USER@$CLUSTER_HOST" "bash -l -c 'scancel ${JOB_ID}'"
|
| 164 |
+
fi
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
# Optional port forwarding
|
| 168 |
+
read -p "Would you like to set up port forwarding now? (y/n): " -n 1 -r
|
| 169 |
+
echo ""
|
| 170 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 171 |
+
# ssh -L "${GRADIO_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
| 172 |
+
# -t "echo 'Port forwarding active: localhost:${GRADIO_PORT} -> ${NODE}:${GRADIO_PORT}'; bash
|
| 173 |
+
# If GRADIO_PORT is in use locally, pick a random free port
|
| 174 |
+
if lsof -iTCP:"$GRADIO_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
|
| 175 |
+
echo "Port $GRADIO_PORT is already in use locally — selecting a free one..."
|
| 176 |
+
LOCAL_PORT=$(comm -23 \
|
| 177 |
+
<(seq 1024 65535 | sort) \
|
| 178 |
+
<(lsof -nP -iTCP -sTCP:LISTEN | awk 'NR>1 {print $9}' | awk -F: '{print $NF}' | sort -u) \
|
| 179 |
+
| awk 'BEGIN{srand()} {ports[NR]=$0} END{print ports[int(rand()*NR)+1]}')
|
| 180 |
+
else
|
| 181 |
+
LOCAL_PORT="$GRADIO_PORT"
|
| 182 |
+
fi
|
| 183 |
+
|
| 184 |
+
echo "Using local port: $LOCAL_PORT"
|
| 185 |
+
|
| 186 |
+
echo "Setting up port forwarding... Open https://localhost:${LOCAL_PORT} in your browser to access the app."
|
| 187 |
+
ssh -L "${LOCAL_PORT}:${NODE}:${GRADIO_PORT}" "$CLUSTER_USER@$CLUSTER_HOST" \
|
| 188 |
+
-t "echo 'Port forwarding active: localhost:${LOCAL_PORT} -> ${NODE}:${GRADIO_PORT}'; bash"
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
echo ""
|
| 192 |
+
echo "Port forwarding ended."
|
| 193 |
+
cancel_job
|
| 194 |
+
else
|
| 195 |
+
echo "Skipping port forwarding."
|
| 196 |
+
|
| 197 |
# Connection info
|
| 198 |
cat <<EOF
|
| 199 |
|
|
|
|
| 211 |
ssh -L $GRADIO_PORT:localhost:$GRADIO_PORT $CLUSTER_USER@$NODE.$CLUSTER_HOST
|
| 212 |
|
| 213 |
Check job status:
|
| 214 |
+
ssh $CLUSTER_USER@$CLUSTER_HOST \"'squeue -j $JOB_ID '\"
|
| 215 |
|
| 216 |
Cancel job:
|
| 217 |
+
ssh $CLUSTER_USER@$CLUSTER_HOST \"'scancel $JOB_ID '\"
|
| 218 |
=========================================
|
| 219 |
|
| 220 |
EOF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
echo "Later you can run: ssh -L $GRADIO_PORT:$NODE:$GRADIO_PORT $CLUSTER_USER@$CLUSTER_HOST"
|
| 222 |
fi
|
| 223 |
|
| 224 |
echo ""
|
| 225 |
echo "Job $JOB_ID is still running on $CLUSTER_HOST:$NODE"
|
| 226 |
+
# echo "Don't forget to cancel it when done: ssh $CLUSTER_USER@$CLUSTER_HOST 'scancel $JOB_ID'"
|
| 227 |
|