Luigi commited on
Commit
ab1f15c
·
1 Parent(s): 0afdb56

Add script to efficiently train with unsloth

Browse files
Files changed (1) hide show
  1. train_with_unsloth.py +142 -0
train_with_unsloth.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python3
2
+ import unsloth
3
+ from transformers import AutoTokenizer
4
+ from unsloth import FastLanguageModel
5
+ from transformers import DataCollatorForLanguageModeling
6
+ from transformers import TrainingArguments, Trainer
7
+ from transformers import pipeline
8
+ from datasets import load_dataset
9
+ import torch
10
+ import os
11
+ import wandb
12
+ from transformers.integrations import WandbCallback
13
+
14
+ PROJECT_NAME='SmolLM2-135M-Instruct-TaiwanChat'
15
+ BASE_MODEL_ID="HuggingFaceTB/SmolLM2-135M-Instruct"
16
+ DATASET_ID="yentinglin/TaiwanChat"
17
+ N_SAMPLES=-1
18
+ MAX_LEN=256
19
+
20
+ # Tell wandb which project to use, and that you want to log your model
21
+ os.environ["WANDB_PROJECT"] = PROJECT_NAME
22
+ os.environ["WANDB_LOG_MODEL"] = "end"
23
+
24
+ # Detect GPU Type
25
+ device_str='cpu'
26
+ if torch.xpu.is_available():
27
+ device_str='xpu'
28
+ elif torch.cuda.is_available():
29
+ device_str='cuda'
30
+ print(f'Device is {device_str}')
31
+
32
+ ## Load with Unsloth’s optimized API
33
+ # 1) Load quantized model
34
+ model, tokenizer = FastLanguageModel.from_pretrained(
35
+ model_name = BASE_MODEL_ID,
36
+ max_seq_length = MAX_LEN,
37
+ dtype = torch.float16,
38
+ load_in_4bit = True,
39
+ full_finetuning= False, # we will add LoRA adapters next
40
+ )
41
+
42
+ # 2) Prepare it for k‑bit training (sets up layer norms, disables caching, etc.)
43
+ from peft import prepare_model_for_kbit_training
44
+ model = prepare_model_for_kbit_training(model) # :contentReference[oaicite:0]{index=0}
45
+
46
+ # 3) Attach LoRA adapters on top of the quantized weights
47
+ from peft import LoraConfig, get_peft_model, TaskType
48
+
49
+ lora_config = LoraConfig(
50
+ r = 8, # low‑rank dimension
51
+ lora_alpha = 16, # scaling
52
+ target_modules = ["q_proj", "v_proj"], # apply to attention
53
+ bias = "none",
54
+ task_type = TaskType.CAUSAL_LM,
55
+ inference_mode = False,
56
+ )
57
+ model = get_peft_model(model, lora_config) # :contentReference[oaicite:1]{index=1}
58
+
59
+ # Now `model` has ~1–2% trainable parameters (the LoRA adapters),
60
+ # and Trainer will no longer throw the “purely quantized” error.
61
+
62
+ # Prepare the TaiwanChat Dataset
63
+ dataset = load_dataset(DATASET_ID, split=f"train[:{N_SAMPLES}]")
64
+
65
+ # Preprocessing Function
66
+ def preprocess_examples(examples):
67
+ # Each 'messages' entry is a list of {"role","content"} dicts
68
+ chats = examples["messages"]
69
+ # Render into a single string via ChatML template
70
+ text = tokenizer.apply_chat_template(chats, tokenize=False, add_generation_prompt=True)
71
+ # Tokenize with truncation
72
+ tokens = tokenizer(text, truncation=True, max_length=MAX_LEN)
73
+ return {"input_ids": tokens["input_ids"],
74
+ "attention_mask": tokens["attention_mask"]}
75
+
76
+ # Tokenization & Data Collator
77
+ tokenized_ds = dataset.map(
78
+ preprocess_examples,
79
+ batched=True,
80
+ remove_columns=dataset.column_names,
81
+ )
82
+
83
+ data_collator = DataCollatorForLanguageModeling(
84
+ tokenizer=tokenizer, mlm=False
85
+ )
86
+
87
+ training_args = TrainingArguments(
88
+ output_dir=PROJECT_NAME,
89
+ per_device_train_batch_size=1,
90
+ gradient_accumulation_steps = 16,
91
+ learning_rate=5e-5,
92
+ num_train_epochs=3,
93
+ fp16=False if device_str == 'xpu' else True,
94
+ bf16=True if device_str == 'xpu' else False,
95
+ logging_steps=1000,
96
+ save_steps=5000,
97
+
98
+ # ─── W&B integration ───
99
+ logging_dir=f"{PROJECT_NAME}/logs", # where to store TensorBoard/W&B logs
100
+ report_to=["wandb"], # enable W&B reporting
101
+ run_name=PROJECT_NAME, # name this run in your W&B project
102
+
103
+ push_to_hub=True,
104
+ gradient_checkpointing=True,
105
+ )
106
+
107
+ # Enable gradient checkpointing on the model
108
+ model.gradient_checkpointing_enable()
109
+
110
+ # Training with Trainer
111
+ trainer = Trainer(
112
+ model=model,
113
+ args=training_args,
114
+ train_dataset=tokenized_ds,
115
+ data_collator=data_collator,
116
+ callbacks=[WandbCallback], # ensure the W&B callback is attached
117
+ )
118
+ trainer.train(resume_from_checkpoint=False)
119
+
120
+ # Save Model & Tokenizer Locally
121
+ trainer.save_model(PROJECT_NAME)
122
+ trainer.push_to_hub(f'Luigi/{PROJECT_NAME}')
123
+ tokenizer.save_pretrained(PROJECT_NAME)
124
+
125
+ # 1) Load from local folder
126
+ model_dir = PROJECT_NAME
127
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
128
+ model = AutoModelForCausalLM.from_pretrained(model_dir) # loads your fine‑tuned weights :contentReference[oaicite:2]{index=2}
129
+
130
+ # Test Fine-tuned Model
131
+ hf_device = 0 if device_str in ("cuda","xpu") else -1
132
+ gen = pipeline(
133
+ "text-generation",
134
+ model=model,
135
+ tokenizer=tokenizer,
136
+ device=hf_device, # or device=0 for GPU
137
+ max_new_tokens=512, # customize as desired
138
+ )
139
+
140
+ prompt = "請問台北今天的天氣如何?"
141
+ output = gen(prompt, do_sample=True, temperature=0.8)
142
+ print(output[0]["generated_text"])