File size: 5,057 Bytes

1a552f6

source ~/environments/clearml/bin/activate

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id ece15d6b3b574a9ba3062f8922dda9d9 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id b3c075c7685a42c798ebe91bdb1bb140 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 70d4376f576241eb9ca066ce2864c517 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 328b46a2fc3d431c95a8d8900f1b88ce \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 29785c1921db4680a1bcbb1c32b1bb0c \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id bb496bef644e4336afba44944aa8e915 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 7ab087ff9ada4033a52350078a2a665a \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 0a0cf6bf30c94a3ea96babf1c92f23a2 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 7da4e5e0e2db49c984d3e195ab09ae09 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-32B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 79b0a7340f2247068940085e66a7e17b \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-32B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256