Qwen2.5-0.5B-quantized.w8a8 / quantize_qwen2.5_w8a8.sh
alexmarques's picture
Upload folder using huggingface_hub
1a552f6 verified
raw
history blame
3.79 kB
source ~/environments/clearml/bin/activate
size="7B"
model="Qwen2.5-${size}"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"
size="72B"
model="Qwen2.5-${size}"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--oneshot-queue oneshot-a100x4 \
--evaluation-queue oneshot-a100x4 \
--task-prefix $prefix \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"
model="Qwen2.5-${size}-Instruct"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--oneshot-queue oneshot-a100x4 \
--evaluation-queue oneshot-a100x4 \
--task-prefix $prefix \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"
<<END
for size in 0.5B 1.5B 3B 7B
do
model="Qwen2.5-${size}"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"
model="Qwen2.5-${size}-Instruct"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"
done
for size in 32B 72B
do
model="Qwen2.5-${size}"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--oneshot-queue oneshot-a100x2 \
--evaluation-queue oneshot-a100x2 \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"
model="Qwen2.5-${size}-Instruct"
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--oneshot-queue oneshot-a100x2 \
--evaluation-queue oneshot-a100x2 \
--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"
done
END