|
|
#!/usr/bin/env bash |
|
|
set -euo pipefail |
|
|
|
|
|
show_help() { |
|
|
cat <<'EOF' |
|
|
Usage: refresh_legal_data.sh --file PATH --code CODE [options] |
|
|
|
|
|
Steps: |
|
|
1. python manage.py load_legal_document --file <PATH> --code <CODE> |
|
|
2. python scripts/generate_embeddings.py --model legal |
|
|
3. python scripts/build_faiss_index.py --model legal |
|
|
|
|
|
Options: |
|
|
--file PATH PDF/DOCX file to ingest (required unless --skip-ingest) |
|
|
--code CODE Document code (required unless --skip-ingest) |
|
|
--skip-ingest Skip step 1 and only regenerate embeddings/indexes |
|
|
--python BIN Python command to use (default: python3) |
|
|
--help Show this message |
|
|
EOF |
|
|
} |
|
|
|
|
|
PYTHON_BIN="python3" |
|
|
FILE_PATH="" |
|
|
DOC_CODE="" |
|
|
SKIP_INGEST=false |
|
|
|
|
|
while [[ $# -gt 0 ]]; do |
|
|
case "$1" in |
|
|
--file) |
|
|
FILE_PATH="$2" |
|
|
shift 2 |
|
|
;; |
|
|
--code) |
|
|
DOC_CODE="$2" |
|
|
shift 2 |
|
|
;; |
|
|
--skip-ingest) |
|
|
SKIP_INGEST=true |
|
|
shift |
|
|
;; |
|
|
--python) |
|
|
PYTHON_BIN="$2" |
|
|
shift 2 |
|
|
;; |
|
|
--help|-h) |
|
|
show_help |
|
|
exit 0 |
|
|
;; |
|
|
*) |
|
|
echo "Unknown option: $1" >&2 |
|
|
show_help |
|
|
exit 1 |
|
|
;; |
|
|
esac |
|
|
done |
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
|
|
PROJECT_ROOT="$SCRIPT_DIR/.." |
|
|
DJANGO_DIR="$PROJECT_ROOT/hue_portal" |
|
|
|
|
|
if [[ "$SKIP_INGEST" = false ]]; then |
|
|
if [[ -z "$FILE_PATH" || -z "$DOC_CODE" ]]; then |
|
|
echo "--file and --code are required unless --skip-ingest is set" >&2 |
|
|
exit 1 |
|
|
fi |
|
|
if [[ ! -f "$FILE_PATH" ]]; then |
|
|
echo "File not found: $FILE_PATH" >&2 |
|
|
exit 1 |
|
|
fi |
|
|
echo "[1/3] Ingesting document ${DOC_CODE} ..." |
|
|
pushd "$DJANGO_DIR" >/dev/null |
|
|
"$PYTHON_BIN" manage.py load_legal_document --file "$FILE_PATH" --code "$DOC_CODE" |
|
|
popd >/dev/null |
|
|
else |
|
|
echo "Skipping ingestion step." |
|
|
fi |
|
|
|
|
|
echo "[2/3] Generating embeddings (legal) ..." |
|
|
pushd "$PROJECT_ROOT" >/dev/null |
|
|
"$PYTHON_BIN" scripts/generate_embeddings.py --model legal |
|
|
popd >/dev/null |
|
|
|
|
|
echo "[3/3] Building FAISS index (legal) ..." |
|
|
pushd "$PROJECT_ROOT" >/dev/null |
|
|
"$PYTHON_BIN" scripts/build_faiss_index.py --model legal |
|
|
popd >/dev/null |
|
|
|
|
|
echo "Done. Updated artifacts located in backend/hue_portal/artifacts/faiss_indexes." |
|
|
|
|
|
|