New alora script (#24)
Browse files- Add script for running vLLM with aLoRA adapters (7f3e4f6bae45d03525f3fd4bb5a8a24e580265de)
Co-authored-by: Fred Reiss <[email protected]>
- run_vllm_alora.sh +48 -0
run_vllm_alora.sh
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#! /bin/bash
|
| 2 |
+
|
| 3 |
+
################################################################################
|
| 4 |
+
# Shell script that starts a copy of vLLM with a base model plus all the
|
| 5 |
+
# available aLoRA adapters in this repository.
|
| 6 |
+
#
|
| 7 |
+
# To run this script:
|
| 8 |
+
# 1. Create and activate a Python virtual environment using a tool such as
|
| 9 |
+
# miniforge, uv, or venv.
|
| 10 |
+
# 2. Install the fork of vLLM that supports aLoRA on your machine
|
| 11 |
+
# (`VLLM_USE_PRECOMPILED=1 pip install git+https://github.com/tdoublep/vllm.git@alora`)
|
| 12 |
+
# 3. Install the Hugging Face CLI (`pip install -U "huggingface_hub[cli]"`)
|
| 13 |
+
# 3. Download the intrinsics library by running:
|
| 14 |
+
# hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
|
| 15 |
+
# 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG, and PORT as needed
|
| 16 |
+
# 5. Run this script from the root of your local copy of rag-intrinsics-lib.
|
| 17 |
+
################################################################################
|
| 18 |
+
|
| 19 |
+
BASE_MODEL_NAME=granite-3.3-8b-instruct
|
| 20 |
+
BASE_MODEL_ORG=ibm-granite
|
| 21 |
+
PORT=55555
|
| 22 |
+
|
| 23 |
+
export VLLM_API_KEY=rag_intrinsics_1234
|
| 24 |
+
|
| 25 |
+
# Find all aLoRA adapters for the target base model. Note that this can be
|
| 26 |
+
# edited to serve both aLoRA and LoRA adapters simultaneously.
|
| 27 |
+
ALORAS=""
|
| 28 |
+
for item in "."/*; do
|
| 29 |
+
# Remove the "./"
|
| 30 |
+
name=$(basename -- "${item}")
|
| 31 |
+
if [ -d "./${name}/alora/${BASE_MODEL_NAME}" ]; then
|
| 32 |
+
ALORAS+="${name}=./${name}/alora/${BASE_MODEL_NAME} "
|
| 33 |
+
fi
|
| 34 |
+
done
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
|
| 38 |
+
--port ${PORT} \
|
| 39 |
+
--gpu-memory-utilization 0.45 \
|
| 40 |
+
--max-model-len 8192 \
|
| 41 |
+
--enable-lora \
|
| 42 |
+
--enable-activated-lora \
|
| 43 |
+
--enable-prefix-caching \
|
| 44 |
+
--max_lora_rank 64 \
|
| 45 |
+
--lora-modules $ALORAS"
|
| 46 |
+
|
| 47 |
+
echo $CMD
|
| 48 |
+
$CMD
|