source ~/environments/clearml/bin/activate

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 0fe5857173ac484a89316214b14fcf96 \
  --clearml-model \
  --queue-name oneshot-a100x2 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-72B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256


<<END
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 6a4ecaa68a6e45ea80c62680b0a65aa0 \
  --clearml-model \
  --queue-name oneshot-a100x2 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-72B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id c67be85bc77f462c93381280019dea1d \
  --clearml-model \
  --queue-name oneshot-a100x4 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id effae214cc464181a92d5a57df10f3d6 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 21dd39ef3013401d84b258410647e847 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 324acdd4c7c4426dbdfeb29667dc4b53 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 82cbaa6e27c84f08ac10e9f115034b0b \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 337f5b50610443c7ad2a380dce8e0be8 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id a8ebd9cae5324572906d50f95eeee5dd \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 5190911e94a340988dac223c252e72a2 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256
END