Kakashi-hatake commited on
Commit
8caf770
·
verified ·
1 Parent(s): 51c573a

added app file

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +135 -0
  3. requirements.txt +121 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import json, re
3
+ import os
4
+ import base64
5
+ import requests
6
+
7
+ from fastapi import FastAPI, UploadFile, File
8
+ from fastapi.responses import JSONResponse
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from PIL import Image
11
+
12
+ app = FastAPI(title="LLaMA 3.2 Vision Menu Extractor")
13
+
14
+ # Enable CORS for frontend interaction (Gradio/Spaces UI)
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+
23
+ API_URL = "https://router.huggingface.co/v1/chat/completions"
24
+ HEADERS = {
25
+ "Authorization": f"Bearer {os.environ['access_token']}",
26
+ "Content-Type": "application/json"
27
+ }
28
+
29
+ PROMPT = """
30
+ You are an AI assistant. Extract item names and their prices from the following image.
31
+
32
+ Your task is to extract item names and their corresponding prices from the image provided.
33
+
34
+ Return ONLY a clean JSON array in this format:
35
+ [
36
+ {"item": "<item_name>", "price": "<price>"},
37
+ ...
38
+ ]
39
+
40
+ ⚠️ Guidelines:
41
+ - Do not include any explanation or text before/after the JSON.
42
+ - Include only entries that have both item and price.
43
+ - Preserve original spellings and formatting from the image.
44
+ - If prices are written in ₹, Rs., or INR, keep the symbol as is.
45
+ - Handle both packaged labels (like chips or snacks) and printed/handwritten menus.
46
+ - If there are duplicates or unclear text, skip them.
47
+
48
+ Only return the final JSON output, No explanation.
49
+
50
+ Make sure each entry has both item and price, and preserve the original spelling.
51
+ """
52
+
53
+ def resize_image(image: Image.Image, max_size=(1024, 1024)) -> Image.Image:
54
+ image.thumbnail(max_size)
55
+ return image
56
+
57
+
58
+ async def encode_image_to_data_url(file: UploadFile=File(...)) -> str:
59
+
60
+ image = Image.open(BytesIO(await file.read()))
61
+
62
+ # Preprocessing
63
+ image = resize_image(image)
64
+
65
+ # Compress and convert to bytes
66
+ buffered = BytesIO()
67
+
68
+ image.save(buffered, quality=80, format="JPEG")
69
+ buffered.seek(0)
70
+ image_bytes = buffered.getvalue()
71
+
72
+ # Encode to base64
73
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
74
+ mime_type = file.content_type
75
+
76
+ return f"data:{mime_type};base64,{base64_image}"
77
+
78
+ @app.get("/")
79
+ def root():
80
+ return {"message": "GLM 4.1V API for menu extraction is running."}
81
+
82
+ @app.post("/extract/")
83
+ async def extract(file: UploadFile = File(...)):
84
+ try:
85
+ # Convert uploaded image to base64 URL format
86
+ image_data_url = await encode_image_to_data_url(file)
87
+
88
+ # Create chat-style payload
89
+ payload = {
90
+ # "model": "zai-org/GLM-4.1V-9B-Thinking:novita",
91
+ # "model": "meta-llama/Llama-3.2-11B-Vision-Instruct:together",
92
+ # "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:novita",
93
+ # "model": "llama3.2-vision:11b",
94
+ "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:novita",
95
+ "messages": [
96
+ {
97
+ "role": "user",
98
+ "content": [
99
+ {
100
+ "type": "text",
101
+ "text": PROMPT
102
+ },
103
+ {
104
+ "type": "image_url",
105
+ "image_url": {
106
+ "url": image_data_url
107
+ }
108
+ }
109
+ ]
110
+ }
111
+ ]
112
+ }
113
+
114
+ # Send POST request to Hugging Face Chat Completion endpoint
115
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
116
+
117
+ result = response.json()
118
+ print("result :", result)
119
+ reply = result["choices"][0]["message"]["content"]
120
+
121
+ except Exception as e:
122
+ return JSONResponse(content={"error": str(e)}, status_code=400)
123
+
124
+ match = re.search(r"\[\s*{.*?}\s*\]", reply, re.DOTALL)
125
+ if match:
126
+ json_str = match.group(0)
127
+ try:
128
+ items = json.loads(json_str)
129
+ return JSONResponse(content={"menu_items": items})
130
+ except json.JSONDecodeError:
131
+ return JSONResponse(status_code=500, content={"error": "Failed to parse JSON", "raw": json_str})
132
+ else:
133
+ return JSONResponse(status_code=404,
134
+ content={"error": "No JSON array found in response", "model_response": reply})
135
+
requirements.txt ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.9.0
2
+ aiofiles==24.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.12.15
5
+ aiosignal==1.4.0
6
+ annotated-types==0.7.0
7
+ anyio==4.9.0
8
+ async-timeout==4.0.3
9
+ attrs==25.3.0
10
+ Brotli==1.1.0
11
+ certifi==2025.7.14
12
+ charset-normalizer==3.4.2
13
+ click==8.2.1
14
+ dataclasses-json==0.6.7
15
+ dnspython==2.7.0
16
+ email_validator==2.2.0
17
+ exceptiongroup==1.3.0
18
+ fastapi==0.116.1
19
+ fastapi-cli==0.0.8
20
+ fastapi-cloud-cli==0.1.5
21
+ ffmpy==0.6.1
22
+ filelock==3.18.0
23
+ frozenlist==1.7.0
24
+ fsspec==2025.7.0
25
+ gradio==5.39.0
26
+ gradio_client==1.11.0
27
+ greenlet==3.2.3
28
+ groovy==0.1.2
29
+ h11==0.16.0
30
+ hf-xet==1.1.5
31
+ httpcore==1.0.9
32
+ httptools==0.6.4
33
+ httpx==0.28.1
34
+ httpx-sse==0.4.1
35
+ huggingface-hub==0.34.3
36
+ idna==3.10
37
+ Jinja2==3.1.6
38
+ jsonpatch==1.33
39
+ jsonpointer==3.0.0
40
+ langchain==0.3.27
41
+ langchain-community==0.3.27
42
+ langchain-core==0.3.72
43
+ langchain-ollama==0.3.6
44
+ langchain-text-splitters==0.3.9
45
+ langsmith==0.4.9
46
+ markdown-it-py==3.0.0
47
+ MarkupSafe==3.0.2
48
+ marshmallow==3.26.1
49
+ mdurl==0.1.2
50
+ mpmath==1.3.0
51
+ multidict==6.6.3
52
+ mypy_extensions==1.1.0
53
+ networkx==3.4.2
54
+ numpy==2.2.6
55
+ nvidia-cublas-cu12==12.6.4.1
56
+ nvidia-cuda-cupti-cu12==12.6.80
57
+ nvidia-cuda-nvrtc-cu12==12.6.77
58
+ nvidia-cuda-runtime-cu12==12.6.77
59
+ nvidia-cudnn-cu12==9.5.1.17
60
+ nvidia-cufft-cu12==11.3.0.4
61
+ nvidia-cufile-cu12==1.11.1.6
62
+ nvidia-curand-cu12==10.3.7.77
63
+ nvidia-cusolver-cu12==11.7.1.2
64
+ nvidia-cusparse-cu12==12.5.4.2
65
+ nvidia-cusparselt-cu12==0.6.3
66
+ nvidia-nccl-cu12==2.26.2
67
+ nvidia-nvjitlink-cu12==12.6.85
68
+ nvidia-nvtx-cu12==12.6.77
69
+ ollama==0.5.1
70
+ orjson==3.11.1
71
+ packaging==25.0
72
+ pandas==2.3.1
73
+ pillow==11.3.0
74
+ propcache==0.3.2
75
+ psutil==7.0.0
76
+ pydantic==2.11.7
77
+ pydantic-settings==2.10.1
78
+ pydantic_core==2.33.2
79
+ pydub==0.25.1
80
+ Pygments==2.19.2
81
+ python-dateutil==2.9.0.post0
82
+ python-dotenv==1.1.1
83
+ python-multipart==0.0.20
84
+ pytz==2025.2
85
+ PyYAML==6.0.2
86
+ regex==2025.7.34
87
+ requests==2.32.4
88
+ requests-toolbelt==1.0.0
89
+ rich==14.1.0
90
+ rich-toolkit==0.14.9
91
+ rignore==0.6.4
92
+ ruff==0.12.7
93
+ safehttpx==0.1.6
94
+ safetensors==0.5.3
95
+ semantic-version==2.10.0
96
+ sentry-sdk==2.34.1
97
+ shellingham==1.5.4
98
+ six==1.17.0
99
+ sniffio==1.3.1
100
+ SQLAlchemy==2.0.42
101
+ starlette==0.47.2
102
+ sympy==1.14.0
103
+ tenacity==9.1.2
104
+ tokenizers==0.21.4
105
+ tomlkit==0.13.3
106
+ torch==2.7.1
107
+ tqdm==4.67.1
108
+ transformers==4.55.0
109
+ triton==3.3.1
110
+ typer==0.16.0
111
+ typing-inspect==0.9.0
112
+ typing-inspection==0.4.1
113
+ typing_extensions==4.14.1
114
+ tzdata==2025.2
115
+ urllib3==2.5.0
116
+ uvicorn==0.35.0
117
+ uvloop==0.21.0
118
+ watchfiles==1.1.0
119
+ websockets==15.0.1
120
+ yarl==1.20.1
121
+ zstandard==0.23.0