| source ~/environments/clearml/bin/activate | |
| recipe_template=$(cat <<'EOF' | |
| quant_stage: | |
| quant_modifiers: | |
| SmoothQuantModifier: | |
| smoothing_strength: 0.0 | |
| mappings: | |
| - [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"] | |
| - [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] | |
| - [["re:.*down_proj"], "re:.*up_proj"] | |
| GPTQModifier: | |
| sequential_update: true | |
| dampening_frac: 0.05 | |
| ignore: ["lm_head"] | |
| scheme: "W8A8" | |
| targets: "Linear" | |
| observer: "mse" | |
| EOF | |
| ) | |
| for size in 3B | |
| do | |
| for version in base | |
| do | |
| for sq in 0.9 | |
| do | |
| recipe=$(echo "$recipe_template" | sed "s/smoothing_strength: 0.0/smoothing_strength: ${sq}/") | |
| if [ $version = "base" ]; then | |
| model="Qwen2.5-${size}" | |
| else | |
| model="Qwen2.5-${size}-Instruct" | |
| fi | |
| prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196_damp005_sq${sq//0./0}" | |
| python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \ | |
| --model-id "Qwen/"$model \ | |
| --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \ | |
| --task-prefix $prefix \ | |
| --recipe "${recipe}" \ | |
| --num-samples 512 \ | |
| --max-seq-len 8196 \ | |
| --tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "SQ" | |
| done | |
| done | |
| done | |