Spaces:

binary1ne
/

vllm-llama2

Paused

App Files Files Community

vllm-llama2 / Dockerfile_2

binary1ne

Rename Dockerfile to Dockerfile_2

97f6eff verified 3 months ago

raw

history blame contribute delete

2.73 kB

	######################### BASE IMAGE #########################
	FROM ubuntu:22.04 AS base

	WORKDIR /workspace/

	ARG PYTHON_VERSION=3.12
	ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"

	ENV LD_PRELOAD=""

	# Install minimal dependencies and uv
	RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
	--mount=type=cache,target=/var/lib/apt,sharing=locked \
	apt-get update -y \
	&& apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
	gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \
	&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
	&& curl -LsSf https://astral.sh/uv/install.sh \| sh

	RUN git clone --branch v0.8.5.post1 https://github.com/vllm-project/vllm.git /workspace/vllm

	ENV CCACHE_DIR=/root/.cache/ccache
	ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache

	ENV PATH="/root/.local/bin:$PATH"
	ENV VIRTUAL_ENV="/opt/venv"
	ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
	RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
	ENV PATH="$VIRTUAL_ENV/bin:$PATH"

	ENV UV_HTTP_TIMEOUT=500

	# Install Python dependencies
	ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
	ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
	ENV UV_INDEX_STRATEGY="unsafe-best-match"
	ENV UV_LINK_MODE="copy"
	RUN --mount=type=cache,target=/root/.cache/uv \
	uv pip install --upgrade pip && \
	uv pip install -r /workspace/vllm/requirements/cpu.txt

	RUN --mount=type=cache,target=/root/.cache/uv \
	uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0

	ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"

	RUN echo 'ulimit -c 0' >> ~/.bashrc

	######################### BUILD IMAGE #########################
	FROM base AS vllm-build

	ARG GIT_REPO_CHECK=0
	# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
	ARG VLLM_CPU_DISABLE_AVX512
	ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}

	WORKDIR /workspace/vllm

	RUN --mount=type=cache,target=/root/.cache/uv \
	uv pip install -r /workspace/vllm/requirements/build.txt

	RUN --mount=type=cache,target=/root/.cache/uv \
	--mount=type=cache,target=/root/.cache/ccache \
	VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel


	######################### RELEASE IMAGE #########################
	FROM base AS vllm-openai

	WORKDIR /workspace/

	RUN --mount=type=cache,target=/root/.cache/uv \
	--mount=type=cache,target=/root/.cache/ccache \
	--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
	uv pip install dist/*.whl

	ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]