starry / docker-entrypoint.sh
k-l-lambda's picture
update: export from starry-refactor 2026-02-21 21:33
3b2dc51
#!/bin/bash
set -e
echo '=== STARRY HF Space Entrypoint ==='
# ── Start PostgreSQL ──
echo 'Starting PostgreSQL...'
/usr/lib/postgresql/15/bin/pg_ctl -D $PGDATA -l /home/node/postgresql.log start -o "-p 5432"
for i in $(seq 1 15); do
if /usr/lib/postgresql/15/bin/pg_isready -h 127.0.0.1 -p 5432 -q 2>/dev/null; then
echo 'PostgreSQL is ready.'
break
fi
echo " Waiting for PostgreSQL... attempt $i/15"
sleep 1
done
# Create database
/usr/lib/postgresql/15/bin/psql -h 127.0.0.1 -p 5432 -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='starry_omr'" | grep -q 1 || \
/usr/lib/postgresql/15/bin/createdb -h 127.0.0.1 -p 5432 starry_omr
# ── Model path resolution from models.yaml ──
MODELS_BASE="/home/node/app/models"
MODELS_YAML="$MODELS_BASE/starry-dist/models.yaml"
HF_REPO="k-l-lambda/starry"
CURL_PROXY=""
if [ -n "$https_proxy" ]; then
CURL_PROXY="-x $https_proxy"
fi
# Download models.yaml from HuggingFace if not present locally
if [ ! -f "$MODELS_YAML" ] && [ -n "$HF_TOKEN" ]; then
echo "Downloading models.yaml from HuggingFace..."
mkdir -p "$MODELS_BASE/starry-dist"
curl -sfL $CURL_PROXY \
-H "Authorization: Bearer $HF_TOKEN" \
-o "$MODELS_YAML" \
"https://huggingface.co/${HF_REPO}/resolve/main/starry-dist/models.yaml?download=true" \
|| echo "Warning: Failed to download models.yaml (token may lack access)"
fi
# Parse models.yaml and set env vars (only if not already set)
if [ -f "$MODELS_YAML" ]; then
echo "Parsing model paths from models.yaml..."
# Helper: extract a field value under a section
yaml_get() {
sed -n "/^${1}:/,/^[a-z]/{ /^ ${2}:/{ s/.*${2}: *//; p; } }" "$MODELS_YAML" | head -1
}
# bdtopo (ONNX model for regulation)
if [ -z "$BDTOPO_MODEL_PATH" ]; then
_bdtopo_path=$(yaml_get bdtopo path)
_bdtopo_default=$(yaml_get bdtopo default)
if [ -n "$_bdtopo_path" ] && [ -n "$_bdtopo_default" ]; then
export BDTOPO_MODEL_PATH="$MODELS_BASE/starry-dist/$_bdtopo_path/$_bdtopo_default"
echo " BDTOPO_MODEL_PATH=$BDTOPO_MODEL_PATH"
fi
fi
# PyTorch predictors (starry-dist)
if [ -z "$LAYOUT_MODEL_PATH" ]; then
export LAYOUT_MODEL_PATH=$(yaml_get layout path)
echo " LAYOUT_MODEL_PATH=$LAYOUT_MODEL_PATH"
fi
if [ -z "$MASK_MODEL_PATH" ]; then
export MASK_MODEL_PATH=$(yaml_get mask path)
echo " MASK_MODEL_PATH=$MASK_MODEL_PATH"
fi
if [ -z "$SEMANTIC_MODEL_PATH" ]; then
export SEMANTIC_MODEL_PATH=$(yaml_get semantic path)
echo " SEMANTIC_MODEL_PATH=$SEMANTIC_MODEL_PATH"
fi
if [ -z "$GAUGE_MODEL_PATH" ]; then
export GAUGE_MODEL_PATH=$(yaml_get gauge path)
echo " GAUGE_MODEL_PATH=$GAUGE_MODEL_PATH"
fi
# OCR predictors (ocr-dist)
if [ -z "$LOC_MODEL_PATH" ]; then
export LOC_MODEL_PATH=$(yaml_get loc path)
echo " LOC_MODEL_PATH=$LOC_MODEL_PATH"
fi
if [ -z "$OCR_CONFIG" ]; then
export OCR_CONFIG=$(yaml_get ocr config)
echo " OCR_CONFIG=$OCR_CONFIG"
fi
if [ -z "$BRACKETS_CONFIG" ]; then
export BRACKETS_CONFIG=$(yaml_get brackets config)
echo " BRACKETS_CONFIG=$BRACKETS_CONFIG"
fi
fi
# ── Download model files from HuggingFace ──
hf_download() {
local subdir="$1"
local relpath="$2"
local dest="$MODELS_BASE/$subdir/$relpath"
if [ -z "$relpath" ]; then return; fi
if [ -e "$dest" ]; then
echo " $subdir/$relpath already present, skipping."
return
fi
echo " Downloading $subdir/$relpath ..."
mkdir -p "$(dirname "$dest")"
if curl -sfL $CURL_PROXY \
-H "Authorization: Bearer $HF_TOKEN" \
-o "$dest" \
"https://huggingface.co/${HF_REPO}/resolve/main/${subdir}/${relpath}?download=true"; then
echo " $subdir/$relpath downloaded OK."
else
rm -f "$dest"
# Retry with .pt extension (some .state.yaml best: fields omit the extension)
if curl -sfL $CURL_PROXY \
-H "Authorization: Bearer $HF_TOKEN" \
-o "${dest}.pt" \
"https://huggingface.co/${HF_REPO}/resolve/main/${subdir}/${relpath}.pt?download=true"; then
echo " $subdir/$relpath.pt downloaded OK (extension fallback)."
else
rm -f "${dest}.pt"
echo " WARNING: Failed to download $subdir/$relpath"
fi
fi
}
# ── Set predictor addresses (internal ZMQ) ──
export PREDICTOR_LAYOUT="${PREDICTOR_LAYOUT:-tcp://127.0.0.1:12022}"
export PREDICTOR_GAUGE="${PREDICTOR_GAUGE:-tcp://127.0.0.1:12023}"
export PREDICTOR_GAUGE_RENDERER="${PREDICTOR_GAUGE_RENDERER:-tcp://127.0.0.1:15656}"
export PREDICTOR_MASK="${PREDICTOR_MASK:-tcp://127.0.0.1:12024}"
export PREDICTOR_SEMANTIC="${PREDICTOR_SEMANTIC:-tcp://127.0.0.1:12025}"
export PREDICTOR_LOC="${PREDICTOR_LOC:-tcp://127.0.0.1:12026}"
export PREDICTOR_OCR="${PREDICTOR_OCR:-tcp://127.0.0.1:12027}"
export PREDICTOR_BRACKETS="${PREDICTOR_BRACKETS:-tcp://127.0.0.1:12028}"
# ── Download models & start ML predictors (background) ──
# Runs in background so Node services and nginx can start immediately.
(
if [ -n "$HF_TOKEN" ] && [ -f "$MODELS_YAML" ]; then
echo "Downloading model files from HuggingFace (background)..."
# Download bdtopo ONNX models
if [ -n "$BDTOPO_MODEL_PATH" ] && [ ! -f "$BDTOPO_MODEL_PATH" ]; then
_bdtopo_path=$(yaml_get bdtopo path)
_bdtopo_dir="$MODELS_BASE/starry-dist/$_bdtopo_path"
mkdir -p "$_bdtopo_dir"
sed -n '/^ files:/,/^[^ ]/{ /^ *- /p }' "$MODELS_YAML" | sed 's/^ *- *//' | while read -r FILE; do
hf_download "starry-dist" "$_bdtopo_path/$FILE"
done
fi
# Download PyTorch model directories (starry-dist)
for model_var in LAYOUT_MODEL_PATH MASK_MODEL_PATH SEMANTIC_MODEL_PATH GAUGE_MODEL_PATH; do
eval model_path=\$$model_var
if [ -n "$model_path" ]; then
hf_download "starry-dist" "$model_path/.state.yaml"
state_yaml="$MODELS_BASE/starry-dist/$model_path/.state.yaml"
if [ -f "$state_yaml" ]; then
# Model weight file is in "best:" field (not "file:")
model_file=$(sed -n 's/^best: *//p' "$state_yaml" | head -1)
if [ -n "$model_file" ]; then
hf_download "starry-dist" "$model_path/$model_file"
fi
# Sub-models listed under "subs:" (skip commented lines starting with #)
sed -n '/^subs:/,/^[a-z]/{ /^[^#]*- /p }' "$state_yaml" | sed 's/^ *- *//' | while read -r sub; do
if [ -n "$sub" ]; then
hf_download "starry-dist" "$model_path/$sub/.state.yaml"
sub_state="$MODELS_BASE/starry-dist/$model_path/$sub/.state.yaml"
if [ -f "$sub_state" ]; then
sub_file=$(sed -n 's/^best: *//p' "$sub_state" | head -1)
if [ -n "$sub_file" ]; then
hf_download "starry-dist" "$model_path/$sub/$sub_file"
fi
fi
fi
done
fi
fi
done
# Download LOC model (a single file, not a directory)
if [ -n "$LOC_MODEL_PATH" ]; then
hf_download "ocr-dist" "$LOC_MODEL_PATH"
fi
# Download OCR/TF config yamls and all referenced files
for config_var in OCR_CONFIG BRACKETS_CONFIG; do
eval config_path=\$$config_var
if [ -n "$config_path" ]; then
hf_download "ocr-dist" "$config_path"
config_file="$MODELS_BASE/ocr-dist/$config_path"
if [ -f "$config_file" ]; then
_dir=$(dirname "$config_path")
# Download all *_path values referenced in the yaml
grep '_path:' "$config_file" | sed 's/^[^:]*: *//' | while read -r ref_path; do
if [ -n "$ref_path" ]; then
# Resolve ./ relative paths
ref_path=$(echo "$ref_path" | sed 's|^\./||')
hf_download "ocr-dist" "${_dir:+$_dir/}$ref_path"
fi
done
fi
fi
done
echo "Model download complete."
fi
# Start ML predictors after models are downloaded
if [ -f /home/node/app/supervisord.conf ] && [ -d /home/node/app/backend/python-services ]; then
if [ -n "$LAYOUT_MODEL_PATH" ]; then
echo "Starting Python ML predictors via supervisord..."
supervisord -c /home/node/app/supervisord.conf
else
echo "Skipping ML predictors (no model paths configured)."
fi
fi
) &
# ── Run database migrations ──
echo 'Running database migrations...'
cd /home/node/app/backend/omr-service
DB_HOST=127.0.0.1 DB_PORT=5432 DB_NAME=starry_omr DB_USER=node DB_PASSWORD= \
npx tsx src/db/migrate.ts 2>&1 || echo 'Warning: Migration failed or skipped'
# Seed example score data + pre-computed regulation solutions
if [ -f /home/node/app/seed-data.sql.gz ]; then
echo 'Loading seed data (example score + regulation solutions)...'
zcat /home/node/app/seed-data.sql.gz | \
/usr/lib/postgresql/15/bin/psql -h 127.0.0.1 -p 5432 -U node -d starry_omr 2>&1 | tail -1
fi
# Extract pre-generated upload images (page sources + mask + background)
if [ -f /home/node/app/seed-uploads.tar.gz ]; then
echo 'Extracting seed upload images...'
tar xzf /home/node/app/seed-uploads.tar.gz -C /tmp/starry-uploads/
fi
# ── Start cluster-server (port 3999) ──
echo 'Starting cluster-server on port 3999...'
cd /home/node/app/backend/cluster-server
PORT=3999 node dist/src/main.js &
# ── Start omr-service (port 3080) ──
echo 'Starting omr-service on port 3080...'
cd /home/node/app/backend/omr-service
export PORT=3080
export DB_HOST=127.0.0.1
export DB_PORT=5432
export DB_NAME=starry_omr
export DB_USER=node
export DB_PASSWORD=
export REGULATION_ENABLED=${REGULATION_ENABLED:-true}
npx tsx src/index.ts &
# Wait for omr-service to be ready
echo 'Waiting for omr-service...'
for i in $(seq 1 30); do
if curl -sf http://127.0.0.1:3080/health > /dev/null 2>&1; then
echo 'omr-service is ready.'
break
fi
sleep 1
done
# ── Start nginx (port 7860, foreground) ──
echo 'Starting nginx on port 7860...'
exec nginx -g 'daemon off;'