source ~/environments/clearml/bin/activate

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 0909a970c53644f593c62b2e076ee763 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 0323219f7a8b4e938c2583ab17851b91 \
  --clearml-model \
  --queue-name oneshot-a100x2 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-72B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 31c634f743fe458687556bc85a6aeb32 \
  --clearml-model \
  --queue-name oneshot-a100x2 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-72B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

<<END
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id ee78f00c9e924db19e8d763442f45209 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 5baf53c17abf460d95a5d04bf4ec184d \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 0dd5e7b10a97426b92d3f7278a1c47ad \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id c63fe381f1e94c2880ce6f8c15465ed4 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id a1ed1bec7c4540eba02cae98c03cada9 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id d7392721b76c4a299ac494c7b3ce1299 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-3B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id 71606d92b022423591547683b81ebc91 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-7B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id c9094d7778584311894b490d5c0d7e66 \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-32B/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256

python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
  --model-id d2164c0031f2445eb726fc94c94210bd \
  --clearml-model \
  --queue-name oneshot-a100x1 \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-name "Qwen2.5-32B-Instruct/openllm/vllm" \
  --benchmark-tasks openllm \
  --max-model-len 4096 \
  --add-bos-token \
  --batch-size auto \
  --enable-chunked-prefill \
  --gpu-memory-utilization 0.9 \
  --max-num-batched-tokens 256
END