File size: 1,261 Bytes

1a552f6


source ~/environments/clearml/bin/activate


recipe_template=$(cat <<'EOF'
quant_stage:
  quant_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.0
      mappings:
        - [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"]
        - [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
        - [["re:.*down_proj"], "re:.*up_proj"]
    GPTQModifier:
      sequential_update: true
      dampening_frac: 0.05
      ignore: ["lm_head"]
      scheme: "W8A8"
      targets: "Linear"
      observer: "mse"
EOF
)

for size in 3B
do
for version in base
do
for sq in 0.9
do

recipe=$(echo "$recipe_template" | sed "s/smoothing_strength: 0.0/smoothing_strength: ${sq}/")

if [ $version = "base" ]; then
  model="Qwen2.5-${size}"
else
  model="Qwen2.5-${size}-Instruct"
fi

prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196_damp005_sq${sq//0./0}"

python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
  --model-id "Qwen/"$model \
  --project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
  --task-prefix $prefix \
  --recipe "${recipe}" \
  --num-samples 512 \
  --max-seq-len 8196 \
  --tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "SQ"

done
done
done