Update quantization/apply_gptq_save_marlin.py
Browse files
quantization/apply_gptq_save_marlin.py
CHANGED
|
@@ -46,7 +46,7 @@ if __name__ == "__main__":
|
|
| 46 |
device_map="auto")
|
| 47 |
model.quantize(examples)
|
| 48 |
|
| 49 |
-
gptq_save_dir =
|
| 50 |
print(f"Saving gptq model to {gptq_save_dir}")
|
| 51 |
model.save_pretrained(gptq_save_dir)
|
| 52 |
tokenizer.save_pretrained(gptq_save_dir)
|
|
@@ -55,14 +55,14 @@ if __name__ == "__main__":
|
|
| 55 |
gc.collect()
|
| 56 |
|
| 57 |
print("Reloading in marlin format")
|
| 58 |
-
|
| 59 |
marlin_model = AutoGPTQForCausalLM.from_quantized(
|
| 60 |
gptq_save_dir,
|
| 61 |
use_marlin=True,
|
| 62 |
device_map="auto")
|
| 63 |
|
| 64 |
print("Saving in marlin format")
|
| 65 |
-
marlin_model.save_pretrained(args.
|
| 66 |
-
tokenizer.save_pretrained(args.
|
| 67 |
|
| 68 |
shutil.rmtree(gptq_save_dir)
|
|
|
|
| 46 |
device_map="auto")
|
| 47 |
model.quantize(examples)
|
| 48 |
|
| 49 |
+
gptq_save_dir = "./tmp-gptq"
|
| 50 |
print(f"Saving gptq model to {gptq_save_dir}")
|
| 51 |
model.save_pretrained(gptq_save_dir)
|
| 52 |
tokenizer.save_pretrained(gptq_save_dir)
|
|
|
|
| 55 |
gc.collect()
|
| 56 |
|
| 57 |
print("Reloading in marlin format")
|
| 58 |
+
|
| 59 |
marlin_model = AutoGPTQForCausalLM.from_quantized(
|
| 60 |
gptq_save_dir,
|
| 61 |
use_marlin=True,
|
| 62 |
device_map="auto")
|
| 63 |
|
| 64 |
print("Saving in marlin format")
|
| 65 |
+
marlin_model.save_pretrained(args.save_dir)
|
| 66 |
+
tokenizer.save_pretrained(args.save_dir)
|
| 67 |
|
| 68 |
shutil.rmtree(gptq_save_dir)
|