#!/usr/bin/env bash set -euo pipefail MODEL_ROOT="/app/bert_tf" mkdir -p "$MODEL_ROOT" if [ -z "${WEIGHTS_URL_TAR_GZ:-}" ]; then echo "[fatal] Set WEIGHTS_URL_TAR_GZ to a DIRECT .tar.gz link (Dropbox must end with dl=1)" >&2 exit 1 fi echo "[start] downloading model…" curl -L "$WEIGHTS_URL_TAR_GZ" -o /tmp/model.tar.gz echo "[start] extracting…" tar -xzf /tmp/model.tar.gz -C "$MODEL_ROOT" || { echo "[fatal] extract failed"; exit 2; } # ---- Find the directory that actually contains vocab.txt (handles any nesting) ---- FOUND="$(find "$MODEL_ROOT" -maxdepth 4 -type f -name 'vocab.txt' | head -n1 || true)" if [ -z "$FOUND" ]; then echo "[fatal] vocab.txt not found under $MODEL_ROOT"; ls -R "$MODEL_ROOT"; exit 3 fi export MODEL_DIR="$(dirname "$FOUND")" # Normalize names expected by HF + TF [ -f "$MODEL_DIR/bert_config.json" ] && cp "$MODEL_DIR/bert_config.json" "$MODEL_DIR/config.json" [ -d "$MODEL_DIR/vocab" ] && [ -f "$MODEL_DIR/vocab/vocab.txt" ] && mv "$MODEL_DIR/vocab/vocab.txt" "$MODEL_DIR/vocab.txt" [ -f "$MODEL_DIR/checkpoint.txt" ] && mv "$MODEL_DIR/checkpoint.txt" "$MODEL_DIR/checkpoint" echo "[debug] MODEL_DIR=$MODEL_DIR" ls -l "$MODEL_DIR" | sed 's/^/[debug] /' # Basic sanity for f in vocab.txt config.json checkpoint; do [ -f "$MODEL_DIR/$f" ] || { echo "[fatal] missing $f"; exit 4; } done if ! ls "$MODEL_DIR"/model.ckpt-*.index >/dev/null 2>&1; then echo "[fatal] missing ckpt index"; exit 5; fi if ! ls "$MODEL_DIR"/model.ckpt-*.data-00000-of-00001 >/dev/null 2>&1; then echo "[fatal] missing ckpt data"; exit 6; fi echo "[start] launching API…" exec uvicorn app:app --host 0.0.0.0 --port "${PORT:-7860}"