File size: 2,604 Bytes
491a6e1
62d6ea4
43e4d79
 
 
f4c8c9a
 
 
62d6ea4
43e4d79
 
 
 
491a6e1
43e4d79
 
 
cef089a
 
 
62d6ea4
 
 
 
 
 
8c8fc9f
62d6ea4
 
 
8c8fc9f
cef089a
62d6ea4
 
cef089a
 
f4c8c9a
cef089a
491a6e1
 
f4c8c9a
43e4d79
 
 
f4c8c9a
491a6e1
 
f4c8c9a
9ad36d8
 
 
43e4d79
 
 
491a6e1
 
43e4d79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491a6e1
f4c8c9a
43e4d79
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
FROM python:3.11-slim

# -----------------------
# ✅ Environment variables
# -----------------------
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    TESSERACT_CMD=/usr/bin/tesseract \
    CHROMA_DIR=/data/chroma \
    CHROMA_ROOT=/data/chroma \
    RAG_PDF_DIR=/app/pdfs \
    PYTHONPATH=/app \
    ENV=prod

# -----------------------
# 🧩 System dependencies
# -----------------------
RUN set -eux; \
    apt-get update; \
    apt-get install -y --no-install-recommends \
        curl \
        ca-certificates \
        gnupg2 \
        apt-transport-https \
        unixodbc \
        unixodbc-dev \
        ffmpeg \
        poppler-utils \
        tesseract-ocr \
        tesseract-ocr-eng \
    ; \
    mkdir -p /etc/apt/keyrings; \
    curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list; \
    apt-get update; \
    ACCEPT_EULA=Y apt-get install -y msodbcsql17; \
    mkdir -p /data/chroma; \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app

# -----------------------
# 🧩 Python dependencies
# -----------------------
RUN python -m pip install --upgrade pip
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt

# ✅ Add compatibility fix for embeddings
RUN pip install --no-cache-dir sentence-transformers==2.2.2 huggingface-hub==0.24.5

# -----------------------
# 📦 Copy application code
# -----------------------
COPY . /app

# -----------------------
# ✅ Auto-ingest script
# -----------------------
RUN echo '#!/usr/bin/env bash\n\
set -euo pipefail\n\
echo "== Container start ==" \n\
echo "ENV=${ENV:-dev}"\n\
echo "CHROMA_ROOT=${CHROMA_ROOT:-/data/chroma}"\n\
mkdir -p "${CHROMA_ROOT}"\n\
_need_ingest=0\n\
for level in low mid high; do\n\
  lvl_dir="${CHROMA_ROOT}/${level}"\n\
  if [ ! -d "$lvl_dir" ] || [ -z "$(ls -A "$lvl_dir" 2>/dev/null || true)" ]; then\n\
    _need_ingest=1\n\
  fi\n\
done\n\
if [ "${_need_ingest}" -eq 1 ]; then\n\
  echo "No Chroma data found → running ingestion..."\n\
  python -m ragg.ingest_all || echo "WARNING: ingestion returned non-zero exit"\n\
else\n\
  echo "Chroma already present → skipping ingestion."\n\
fi\n\
exec gunicorn --workers 2 --threads 4 --timeout 120 -b 0.0.0.0:7860 verification:app' > /app/start.sh

RUN chmod +x /app/start.sh

EXPOSE 7860

# -----------------------
# ✅ Final command
# -----------------------
CMD ["/app/start.sh"]