source ~/environments/clearml/bin/activate recipe_template=$(cat <<'EOF' quant_stage: quant_modifiers: SmoothQuantModifier: smoothing_strength: 0.0 mappings: - [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"] - [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] - [["re:.*down_proj"], "re:.*up_proj"] GPTQModifier: sequential_update: true dampening_frac: 0.05 ignore: ["lm_head"] scheme: "W8A8" targets: "Linear" observer: "mse" EOF ) for size in 3B do for version in base do for sq in 0.9 do recipe=$(echo "$recipe_template" | sed "s/smoothing_strength: 0.0/smoothing_strength: ${sq}/") if [ $version = "base" ]; then model="Qwen2.5-${size}" else model="Qwen2.5-${size}-Instruct" fi prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196_damp005_sq${sq//0./0}" python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \ --model-id "Qwen/"$model \ --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \ --task-prefix $prefix \ --recipe "${recipe}" \ --num-samples 512 \ --max-seq-len 8196 \ --tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "SQ" done done done