| source ~/environments/clearml/bin/activate | |
| recipe_template=$(cat <<'EOF' | |
| quant_stage: | |
| quant_modifiers: | |
| GPTQModifier: | |
| sequential_update: true | |
| dampening_frac: 0.1 | |
| ignore: ["lm_head"] | |
| config_groups: | |
| group_0: | |
| weights: | |
| num_bits: 4 | |
| type: "int" | |
| symmetric: true | |
| strategy: "group" | |
| group_size: 128 | |
| actorder: "group" | |
| targets: ["Linear"] | |
| observer: "mse" | |
| EOF | |
| ) | |
| for size in 0.5B 1.5B 3B 7B 32B 72B | |
| do | |
| for version in base instruct | |
| do | |
| if [ $version = "base" ]; then | |
| model="Qwen2.5-${size}" | |
| else | |
| model="Qwen2.5-${size}-Instruct" | |
| fi | |
| prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196__damp01" | |
| python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \ | |
| --model-id "Qwen/"$model \ | |
| --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \ | |
| --task-prefix $prefix \ | |
| --recipe "${recipe}" \ | |
| --num-samples 512 \ | |
| --max-seq-len 8196 \ | |
| --tags "Qwen2.5" "W4A16" "calibration" $size "MSE" $version | |
| done | |
| done |