File size: 1,093 Bytes

1a552f6


source ~/environments/clearml/bin/activate

recipe_template=$(cat <<'EOF'
quant_stage:
  quant_modifiers:
    GPTQModifier:
      sequential_update: true
      dampening_frac: 0.1
      ignore: ["lm_head"]
      config_groups:
        group_0:
          weights:
            num_bits: 4
            type: "int"
            symmetric: true
            strategy: "group"
            group_size: 128
            actorder: "group"
          targets: ["Linear"]
          observer: "mse"
EOF
)

for size in 0.5B 1.5B 3B 7B 32B 72B
do
for version in base instruct
do


if [ $version = "base" ]; then
  model="Qwen2.5-${size}"
else
  model="Qwen2.5-${size}-Instruct"
fi

prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196__damp01"

python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
  --model-id "Qwen/"$model \
  --project-name "LLM quantization - W4A16/llmcompressor/Qwen2.5" \
  --task-prefix $prefix \
  --recipe "${recipe}" \
  --num-samples 512 \
  --max-seq-len 8196 \
  --tags "Qwen2.5" "W4A16" "calibration" $size "MSE" $version

done
done