Oviya commited on
Commit
62d6ea4
·
1 Parent(s): f4c8c9a
Files changed (1) hide show
  1. Dockerfile +14 -18
Dockerfile CHANGED
@@ -1,46 +1,42 @@
1
  FROM python:3.11-slim
 
2
  ENV DEBIAN_FRONTEND=noninteractive \
3
  PYTHONUNBUFFERED=1 \
4
- # Tesseract path for pytesseract (your code reads this)
5
  TESSERACT_CMD=/usr/bin/tesseract \
6
- # Default Chroma persistence (your code also reads CHROMA_DIR)
7
- CHROMA_DIR=/data/chroma
8
 
9
  # System deps + Microsoft key via keyring (for msodbcsql17)
10
  RUN set -eux; \
11
  apt-get update; \
12
  apt-get install -y --no-install-recommends \
13
- curl ca-certificates gnupg2 apt-transport-https \
14
- unixodbc unixodbc-dev \
 
 
 
 
15
  ffmpeg \
16
- # ---- Added for pdf2image + pytesseract ----
17
- poppler-utils \ # provides pdftoppm / pdftocairo
18
- tesseract-ocr \ # OCR engine
19
- tesseract-ocr-eng # English language data (pulled with tesseract on many distros, kept explicit)
20
  ; \
21
  mkdir -p /etc/apt/keyrings; \
22
- curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \
23
- | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
24
- echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" \
25
- > /etc/apt/sources.list.d/mssql-release.list; \
26
  apt-get update; \
27
  ACCEPT_EULA=Y apt-get install -y msodbcsql17; \
28
- # Create a writable place for Chroma persistence
29
  mkdir -p /data/chroma; \
30
  rm -rf /var/lib/apt/lists/*
31
 
32
  WORKDIR /app
33
 
34
- # (Optional but helpful) make sure pip is recent and faster
35
  RUN python -m pip install --upgrade pip
36
-
37
  COPY requirements.txt /app/
38
  RUN pip install --no-cache-dir -r requirements.txt
39
 
40
- # Your code
41
  COPY . /app
42
 
43
  EXPOSE 7860
44
 
45
- # Gunicorn entrypoint stays the same
46
  CMD ["gunicorn","--workers","2","--threads","4","--timeout","120","-b","0.0.0.0:7860","verification:app"]
 
1
  FROM python:3.11-slim
2
+
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  PYTHONUNBUFFERED=1 \
 
5
  TESSERACT_CMD=/usr/bin/tesseract \
6
+ CHROMA_DIR=/data/chroma \
7
+ PYTHONPATH=/app
8
 
9
  # System deps + Microsoft key via keyring (for msodbcsql17)
10
  RUN set -eux; \
11
  apt-get update; \
12
  apt-get install -y --no-install-recommends \
13
+ curl \
14
+ ca-certificates \
15
+ gnupg2 \
16
+ apt-transport-https \
17
+ unixodbc \
18
+ unixodbc-dev \
19
  ffmpeg \
20
+ poppler-utils \
21
+ tesseract-ocr \
22
+ tesseract-ocr-eng \
 
23
  ; \
24
  mkdir -p /etc/apt/keyrings; \
25
+ curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
26
+ echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list; \
 
 
27
  apt-get update; \
28
  ACCEPT_EULA=Y apt-get install -y msodbcsql17; \
 
29
  mkdir -p /data/chroma; \
30
  rm -rf /var/lib/apt/lists/*
31
 
32
  WORKDIR /app
33
 
 
34
  RUN python -m pip install --upgrade pip
 
35
  COPY requirements.txt /app/
36
  RUN pip install --no-cache-dir -r requirements.txt
37
 
 
38
  COPY . /app
39
 
40
  EXPOSE 7860
41
 
 
42
  CMD ["gunicorn","--workers","2","--threads","4","--timeout","120","-b","0.0.0.0:7860","verification:app"]