Qwen2.5-0.5B-quantized.w8a8 / evaluate_qwen2.5_w4a16.sh
alexmarques's picture
Upload folder using huggingface_hub
1a552f6 verified
source ~/environments/clearml/bin/activate
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id ece15d6b3b574a9ba3062f8922dda9d9 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-0.5B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id b3c075c7685a42c798ebe91bdb1bb140 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 70d4376f576241eb9ca066ce2864c517 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-1.5B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 328b46a2fc3d431c95a8d8900f1b88ce \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 29785c1921db4680a1bcbb1c32b1bb0c \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-3B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id bb496bef644e4336afba44944aa8e915 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-3B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 7ab087ff9ada4033a52350078a2a665a \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-7B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 0a0cf6bf30c94a3ea96babf1c92f23a2 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-7B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 7da4e5e0e2db49c984d3e195ab09ae09 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-32B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 79b0a7340f2247068940085e66a7e17b \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-32B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256