#!/usr/bin/env bash set -euo pipefail echo "== Container start ==" echo "ENV=${ENV:-dev}" echo "CHROMA_ROOT=${CHROMA_ROOT:-/data/chroma}" # Ensure Chroma root exists mkdir -p "${CHROMA_ROOT}" # Decide whether ingestion is needed (if any level folder missing or empty) _need_ingest=0 for level in low mid high; do lvl_dir="${CHROMA_ROOT}/${level}" if [ ! -d "$lvl_dir" ] || [ -z "$(ls -A "$lvl_dir" 2>/dev/null || true)" ]; then _need_ingest=1 fi done if [ "${_need_ingest}" -eq 1 ]; then echo "No (or empty) Chroma data found → running ingestion..." # Ingest PDFs from /app/pdfs/{low,mid,high} into ${CHROMA_ROOT}/{low,mid,high} python -m ragg.ingest_all || echo "WARNING: ingestion returned non-zero exit" else echo "Chroma already present → skipping ingestion." fi # Start the API exec gunicorn --workers 2 --threads 4 --timeout 120 -b 0.0.0.0:7860 verification:app