| source ~/environments/clearml/bin/activate | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id ece15d6b3b574a9ba3062f8922dda9d9 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-0.5B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id b3c075c7685a42c798ebe91bdb1bb140 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 70d4376f576241eb9ca066ce2864c517 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-1.5B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 328b46a2fc3d431c95a8d8900f1b88ce \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 29785c1921db4680a1bcbb1c32b1bb0c \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-3B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id bb496bef644e4336afba44944aa8e915 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-3B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 7ab087ff9ada4033a52350078a2a665a \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-7B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 0a0cf6bf30c94a3ea96babf1c92f23a2 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-7B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 7da4e5e0e2db49c984d3e195ab09ae09 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-32B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 79b0a7340f2247068940085e66a7e17b \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-32B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 |