Spaces:

binary1ne
/

vllm-llama2

Paused

binary1ne commited on Aug 12

Commit

28e546f

verified ·

1 Parent(s): d8f814e

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,14 +1,21 @@
-FROM vllm/vllm-openai:latest
-# Expose your desired port
 EXPOSE 7860
-# Environment variables for host/port
 ENV VLLM_HOST=0.0.0.0
 ENV VLLM_PORT=7860
-ENV VLLM_LOGGING_LEVEL=DEBUG
-ENV VLLM_DEVICE=cpu
-# Run vLLM with env-based host and port
-CMD ["vllm serve --model unsloth/llama-2-7b-bnb-4bit --host 0.0.0.0 --port 7860"]

+FROM python:3.12-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+# Install CPU-only PyTorch + vLLM
+RUN pip install --no-cache-dir torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
+RUN pip install --no-cache-dir vllm
+# Expose port
 EXPOSE 7860
+# Env variables
 ENV VLLM_HOST=0.0.0.0
 ENV VLLM_PORT=7860
+ENV HUGGING_FACE_HUB_TOKEN=<your_hf_token>
+# Command to run vLLM on CPU
+CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --device cpu --host $VLLM_HOST --port $VLLM_PORT"]