Qwen2.5-0.5B-quantized.w8a8 / quantize_qwen2.5_w8a8.sh

Upload folder using huggingface_hub

1a552f6 verified about 1 year ago

3.79 kB


	source ~/environments/clearml/bin/activate

	size="7B"
	model="Qwen2.5-${size}"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--task-prefix $prefix \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"

	size="72B"
	model="Qwen2.5-${size}"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--oneshot-queue oneshot-a100x4 \
	--evaluation-queue oneshot-a100x4 \
	--task-prefix $prefix \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"

	model="Qwen2.5-${size}-Instruct"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--oneshot-queue oneshot-a100x4 \
	--evaluation-queue oneshot-a100x4 \
	--task-prefix $prefix \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse_damp01.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"


	<<END
	for size in 0.5B 1.5B 3B 7B
	do

	model="Qwen2.5-${size}"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--task-prefix $prefix \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"

	model="Qwen2.5-${size}-Instruct"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--task-prefix $prefix \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"

	done

	for size in 32B 72B
	do

	model="Qwen2.5-${size}"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--task-prefix $prefix \
	--oneshot-queue oneshot-a100x2 \
	--evaluation-queue oneshot-a100x2 \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE"

	model="Qwen2.5-${size}-Instruct"
	prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196"

	python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
	--model-id "Qwen/"$model \
	--project-name "LLM quantization - W8A8/llmcompressor/Qwen2.5" \
	--task-prefix $prefix \
	--oneshot-queue oneshot-a100x2 \
	--evaluation-queue oneshot-a100x2 \
	--recipe "/network/alexandre/quantization/recipe_w8a8_mse.yaml" \
	--num-samples 512 \
	--max-seq-len 8196 \
	--tags "Qwen2.5" "W8A8" "calibration" $size "MSE" "Instruct"

	done
	END