File size: 9,939 Bytes
6f1c297
 
 
 
 
 
 
a523941
6f1c297
 
 
 
 
 
 
 
 
 
 
504345b
6f1c297
 
2b7aae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197a18e
 
2b7aae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197a18e
2b7aae2
 
197a18e
2b7aae2
 
 
6a8cad3
 
 
 
 
 
 
 
 
 
2b7aae2
 
 
7b97a14
 
 
 
 
 
 
 
 
2b7aae2
7b97a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b7aae2
7b97a14
 
 
 
 
2b7aae2
7b97a14
2b7aae2
fc19dcf
 
2b7aae2
7b97a14
2b7aae2
fc19dcf
 
7b97a14
 
 
 
fc19dcf
7b97a14
 
 
 
 
 
2b7aae2
 
7b97a14
2b7aae2
fc19dcf
 
 
 
 
 
 
7b97a14
 
fc19dcf
 
 
 
 
 
 
 
 
 
7b97a14
fc19dcf
7b97a14
 
 
2b7aae2
7b97a14
 
2b7aae2
7b97a14
 
 
 
 
 
 
 
2b7aae2
7b97a14
2b7aae2
6f1c297
 
a523941
 
6f1c297
 
95c4896
 
 
 
 
8edd547
 
3b2dc51
 
 
 
1b06006
 
6f1c297
 
a523941
6f1c297
 
 
 
a523941
6f1c297
 
 
 
a523941
6f1c297
2b7aae2
6f1c297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#!/bin/bash
set -e

echo '=== STARRY HF Space Entrypoint ==='

# ── Start PostgreSQL ──
echo 'Starting PostgreSQL...'
/usr/lib/postgresql/15/bin/pg_ctl -D $PGDATA -l /home/node/postgresql.log start -o "-p 5432"

for i in $(seq 1 15); do
  if /usr/lib/postgresql/15/bin/pg_isready -h 127.0.0.1 -p 5432 -q 2>/dev/null; then
    echo 'PostgreSQL is ready.'
    break
  fi
  echo "  Waiting for PostgreSQL... attempt $i/15"
  sleep 1
done

# Create database
/usr/lib/postgresql/15/bin/psql -h 127.0.0.1 -p 5432 -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='starry_omr'" | grep -q 1 || \
  /usr/lib/postgresql/15/bin/createdb -h 127.0.0.1 -p 5432 starry_omr

# ── Model path resolution from models.yaml ──
MODELS_BASE="/home/node/app/models"
MODELS_YAML="$MODELS_BASE/starry-dist/models.yaml"
HF_REPO="k-l-lambda/starry"

CURL_PROXY=""
if [ -n "$https_proxy" ]; then
  CURL_PROXY="-x $https_proxy"
fi

# Download models.yaml from HuggingFace if not present locally
if [ ! -f "$MODELS_YAML" ] && [ -n "$HF_TOKEN" ]; then
  echo "Downloading models.yaml from HuggingFace..."
  mkdir -p "$MODELS_BASE/starry-dist"
  curl -sfL $CURL_PROXY \
    -H "Authorization: Bearer $HF_TOKEN" \
    -o "$MODELS_YAML" \
    "https://huggingface.co/${HF_REPO}/resolve/main/starry-dist/models.yaml?download=true" \
    || echo "Warning: Failed to download models.yaml (token may lack access)"
fi

# Parse models.yaml and set env vars (only if not already set)
if [ -f "$MODELS_YAML" ]; then
  echo "Parsing model paths from models.yaml..."

  # Helper: extract a field value under a section
  yaml_get() {
    sed -n "/^${1}:/,/^[a-z]/{ /^  ${2}:/{ s/.*${2}: *//; p; } }" "$MODELS_YAML" | head -1
  }

  # bdtopo (ONNX model for regulation)
  if [ -z "$BDTOPO_MODEL_PATH" ]; then
    _bdtopo_path=$(yaml_get bdtopo path)
    _bdtopo_default=$(yaml_get bdtopo default)
    if [ -n "$_bdtopo_path" ] && [ -n "$_bdtopo_default" ]; then
      export BDTOPO_MODEL_PATH="$MODELS_BASE/starry-dist/$_bdtopo_path/$_bdtopo_default"
      echo "  BDTOPO_MODEL_PATH=$BDTOPO_MODEL_PATH"
    fi
  fi

  # PyTorch predictors (starry-dist)
  if [ -z "$LAYOUT_MODEL_PATH" ]; then
    export LAYOUT_MODEL_PATH=$(yaml_get layout path)
    echo "  LAYOUT_MODEL_PATH=$LAYOUT_MODEL_PATH"
  fi
  if [ -z "$MASK_MODEL_PATH" ]; then
    export MASK_MODEL_PATH=$(yaml_get mask path)
    echo "  MASK_MODEL_PATH=$MASK_MODEL_PATH"
  fi
  if [ -z "$SEMANTIC_MODEL_PATH" ]; then
    export SEMANTIC_MODEL_PATH=$(yaml_get semantic path)
    echo "  SEMANTIC_MODEL_PATH=$SEMANTIC_MODEL_PATH"
  fi
  if [ -z "$GAUGE_MODEL_PATH" ]; then
    export GAUGE_MODEL_PATH=$(yaml_get gauge path)
    echo "  GAUGE_MODEL_PATH=$GAUGE_MODEL_PATH"
  fi

  # OCR predictors (ocr-dist)
  if [ -z "$LOC_MODEL_PATH" ]; then
    export LOC_MODEL_PATH=$(yaml_get loc path)
    echo "  LOC_MODEL_PATH=$LOC_MODEL_PATH"
  fi
  if [ -z "$OCR_CONFIG" ]; then
    export OCR_CONFIG=$(yaml_get ocr config)
    echo "  OCR_CONFIG=$OCR_CONFIG"
  fi
  if [ -z "$BRACKETS_CONFIG" ]; then
    export BRACKETS_CONFIG=$(yaml_get brackets config)
    echo "  BRACKETS_CONFIG=$BRACKETS_CONFIG"
  fi
fi

# ── Download model files from HuggingFace ──
hf_download() {
  local subdir="$1"
  local relpath="$2"
  local dest="$MODELS_BASE/$subdir/$relpath"

  if [ -z "$relpath" ]; then return; fi
  if [ -e "$dest" ]; then
    echo "  $subdir/$relpath already present, skipping."
    return
  fi

  echo "  Downloading $subdir/$relpath ..."
  mkdir -p "$(dirname "$dest")"
  if curl -sfL $CURL_PROXY \
    -H "Authorization: Bearer $HF_TOKEN" \
    -o "$dest" \
    "https://huggingface.co/${HF_REPO}/resolve/main/${subdir}/${relpath}?download=true"; then
    echo "  $subdir/$relpath downloaded OK."
  else
    rm -f "$dest"
    # Retry with .pt extension (some .state.yaml best: fields omit the extension)
    if curl -sfL $CURL_PROXY \
      -H "Authorization: Bearer $HF_TOKEN" \
      -o "${dest}.pt" \
      "https://huggingface.co/${HF_REPO}/resolve/main/${subdir}/${relpath}.pt?download=true"; then
      echo "  $subdir/$relpath.pt downloaded OK (extension fallback)."
    else
      rm -f "${dest}.pt"
      echo "  WARNING: Failed to download $subdir/$relpath"
    fi
  fi
}

# ── Set predictor addresses (internal ZMQ) ──
export PREDICTOR_LAYOUT="${PREDICTOR_LAYOUT:-tcp://127.0.0.1:12022}"
export PREDICTOR_GAUGE="${PREDICTOR_GAUGE:-tcp://127.0.0.1:12023}"
export PREDICTOR_GAUGE_RENDERER="${PREDICTOR_GAUGE_RENDERER:-tcp://127.0.0.1:15656}"
export PREDICTOR_MASK="${PREDICTOR_MASK:-tcp://127.0.0.1:12024}"
export PREDICTOR_SEMANTIC="${PREDICTOR_SEMANTIC:-tcp://127.0.0.1:12025}"
export PREDICTOR_LOC="${PREDICTOR_LOC:-tcp://127.0.0.1:12026}"
export PREDICTOR_OCR="${PREDICTOR_OCR:-tcp://127.0.0.1:12027}"
export PREDICTOR_BRACKETS="${PREDICTOR_BRACKETS:-tcp://127.0.0.1:12028}"

# ── Download models & start ML predictors (background) ──
# Runs in background so Node services and nginx can start immediately.
(
  if [ -n "$HF_TOKEN" ] && [ -f "$MODELS_YAML" ]; then
    echo "Downloading model files from HuggingFace (background)..."

    # Download bdtopo ONNX models
    if [ -n "$BDTOPO_MODEL_PATH" ] && [ ! -f "$BDTOPO_MODEL_PATH" ]; then
      _bdtopo_path=$(yaml_get bdtopo path)
      _bdtopo_dir="$MODELS_BASE/starry-dist/$_bdtopo_path"
      mkdir -p "$_bdtopo_dir"

      sed -n '/^  files:/,/^[^ ]/{ /^ *- /p }' "$MODELS_YAML" | sed 's/^ *- *//' | while read -r FILE; do
        hf_download "starry-dist" "$_bdtopo_path/$FILE"
      done
    fi

    # Download PyTorch model directories (starry-dist)
    for model_var in LAYOUT_MODEL_PATH MASK_MODEL_PATH SEMANTIC_MODEL_PATH GAUGE_MODEL_PATH; do
      eval model_path=\$$model_var
      if [ -n "$model_path" ]; then
        hf_download "starry-dist" "$model_path/.state.yaml"

        state_yaml="$MODELS_BASE/starry-dist/$model_path/.state.yaml"
        if [ -f "$state_yaml" ]; then
          # Model weight file is in "best:" field (not "file:")
          model_file=$(sed -n 's/^best: *//p' "$state_yaml" | head -1)
          if [ -n "$model_file" ]; then
            hf_download "starry-dist" "$model_path/$model_file"
          fi
          # Sub-models listed under "subs:" (skip commented lines starting with #)
          sed -n '/^subs:/,/^[a-z]/{ /^[^#]*- /p }' "$state_yaml" | sed 's/^ *- *//' | while read -r sub; do
            if [ -n "$sub" ]; then
              hf_download "starry-dist" "$model_path/$sub/.state.yaml"
              sub_state="$MODELS_BASE/starry-dist/$model_path/$sub/.state.yaml"
              if [ -f "$sub_state" ]; then
                sub_file=$(sed -n 's/^best: *//p' "$sub_state" | head -1)
                if [ -n "$sub_file" ]; then
                  hf_download "starry-dist" "$model_path/$sub/$sub_file"
                fi
              fi
            fi
          done
        fi
      fi
    done

    # Download LOC model (a single file, not a directory)
    if [ -n "$LOC_MODEL_PATH" ]; then
      hf_download "ocr-dist" "$LOC_MODEL_PATH"
    fi

    # Download OCR/TF config yamls and all referenced files
    for config_var in OCR_CONFIG BRACKETS_CONFIG; do
      eval config_path=\$$config_var
      if [ -n "$config_path" ]; then
        hf_download "ocr-dist" "$config_path"
        config_file="$MODELS_BASE/ocr-dist/$config_path"
        if [ -f "$config_file" ]; then
          _dir=$(dirname "$config_path")
          # Download all *_path values referenced in the yaml
          grep '_path:' "$config_file" | sed 's/^[^:]*: *//' | while read -r ref_path; do
            if [ -n "$ref_path" ]; then
              # Resolve ./ relative paths
              ref_path=$(echo "$ref_path" | sed 's|^\./||')
              hf_download "ocr-dist" "${_dir:+$_dir/}$ref_path"
            fi
          done
        fi
      fi
    done

    echo "Model download complete."
  fi

  # Start ML predictors after models are downloaded
  if [ -f /home/node/app/supervisord.conf ] && [ -d /home/node/app/backend/python-services ]; then
    if [ -n "$LAYOUT_MODEL_PATH" ]; then
      echo "Starting Python ML predictors via supervisord..."
      supervisord -c /home/node/app/supervisord.conf
    else
      echo "Skipping ML predictors (no model paths configured)."
    fi
  fi
) &

# ── Run database migrations ──
echo 'Running database migrations...'
cd /home/node/app/backend/omr-service
DB_HOST=127.0.0.1 DB_PORT=5432 DB_NAME=starry_omr DB_USER=node DB_PASSWORD= \
  npx tsx src/db/migrate.ts 2>&1 || echo 'Warning: Migration failed or skipped'

# Seed example score data + pre-computed regulation solutions
if [ -f /home/node/app/seed-data.sql.gz ]; then
  echo 'Loading seed data (example score + regulation solutions)...'
  zcat /home/node/app/seed-data.sql.gz | \
    /usr/lib/postgresql/15/bin/psql -h 127.0.0.1 -p 5432 -U node -d starry_omr 2>&1 | tail -1
fi

# Extract pre-generated upload images (page sources + mask + background)
if [ -f /home/node/app/seed-uploads.tar.gz ]; then
  echo 'Extracting seed upload images...'
  tar xzf /home/node/app/seed-uploads.tar.gz -C /tmp/starry-uploads/
fi

# ── Start cluster-server (port 3999) ──
echo 'Starting cluster-server on port 3999...'
cd /home/node/app/backend/cluster-server
PORT=3999 node dist/src/main.js &

# ── Start omr-service (port 3080) ──
echo 'Starting omr-service on port 3080...'
cd /home/node/app/backend/omr-service
export PORT=3080
export DB_HOST=127.0.0.1
export DB_PORT=5432
export DB_NAME=starry_omr
export DB_USER=node
export DB_PASSWORD=
export REGULATION_ENABLED=${REGULATION_ENABLED:-true}
npx tsx src/index.ts &

# Wait for omr-service to be ready
echo 'Waiting for omr-service...'
for i in $(seq 1 30); do
  if curl -sf http://127.0.0.1:3080/health > /dev/null 2>&1; then
    echo 'omr-service is ready.'
    break
  fi
  sleep 1
done

# ── Start nginx (port 7860, foreground) ──
echo 'Starting nginx on port 7860...'
exec nginx -g 'daemon off;'