source ~/environments/clearml/bin/activate for size in 0.5B 1.5B 3B 7B do model="Qwen2.5-${size}" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x1 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256 model="Qwen2.5-${size}-Instruct" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x1 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256 done size=32B model="Qwen2.5-${size}" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x2 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256 model="Qwen2.5-${size}-Instruct" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x2 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256 size=72B model="Qwen2.5-${size}" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x4 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256 model="Qwen2.5-${size}-Instruct" python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ --model-id "Qwen/"$model \ --queue-name oneshot-a100x4 \ --project-name "LLM reference/vllm" \ --task-name "${model}/openllm" \ --benchmark-tasks openllm \ --max-model-len 4096 \ --add-bos-token \ --batch-size auto \ --enable-chunked-prefill \ --gpu-memory-utilization 0.9 \ --max-num-batched-tokens 256