Spaces:
Running
Running
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| import torch | |
| model_id = "Qwen/Qwen2.5-Coder-0.5B-Instruct" | |
| lora_path = "./Qwen2.5-Coder-0.5B-lora" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, | |
| trust_remote_code=True | |
| ) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # π΄ CPU ONLY β NO CUDA, NO device_map | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True, | |
| ) | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| lora_path, | |
| ) | |
| print("π Merging LoRA (this will take time on CPU)...") | |
| merged_model = model.merge_and_unload() | |
| merged_model.save_pretrained("./Qwen2.5-Coder-0.5B-lora-merged") | |
| tokenizer.save_pretrained("./Qwen2.5-Coder-0.5B-lora-merged") | |
| print("β Merge complete") | |