File size: 5,047 Bytes
ab1f15c 1f5b86c ab1f15c 1f5b86c ab1f15c 1f5b86c ab1f15c 1f5b86c 2340301 ab1f15c 1f5b86c ab1f15c 5d7244c 1f5b86c ab1f15c 2340301 ab1f15c 3645d25 ab1f15c 1f5b86c ab1f15c b91c58e 1f5b86c 2340301 1f5b86c 2340301 1f5b86c 2340301 1f5b86c 8249056 b91c58e ab1f15c 1f5b86c ab1f15c 1f5b86c ab1f15c 1f5b86c 2340301 20210b2 22d1cb2 20210b2 1f5b86c ab1f15c 1f5b86c 5d7244c 1f5b86c ab1f15c 1f5b86c ab1f15c 1f5b86c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
#! /usr/bin/env python3
"""
Fine-tune “SmolLM2-135M-Instruct” on the TaiwanChat dataset using Unsloth’s 4-bit quantization
+ LoRA adapters, with evaluation on a 1% hold-out every step, and push the merged model to Hugging Face.
Steps:
1. Load a 4-bit quantized base model via Unsloth’s FastLanguageModel.
2. Attach LoRA adapters (r=16) and enable gradient checkpointing for memory savings.
3. Load TaiwanChat, render ChatML, and split 99/1 train/validation.
4. Configure SFTTrainer to mask user prompts (train_on_responses_only), run eval every step, log to W&B.
5. Train for up to 60 steps.
6. Merge base+LoRA weights into 16-bit safetensors and push to Hugging Face with `push_to_hub_merged`.
"""
from unsloth import FastLanguageModel
from trl import SFTTrainer, SFTConfig
from transformers import DataCollatorForLanguageModeling
from unsloth.chat_templates import train_on_responses_only
from datasets import load_dataset
import os
from transformers.integrations import WandbCallback
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
PROJECT_NAME='SmolLM2-135M-Instruct-TaiwanChat'
BASE_MODEL_ID="unsloth/SmolLM2-135M-Instruct"
DATASET_ID="yentinglin/TaiwanChat"
N_SAMPLES=80000
MAX_LEN=2048
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
# Tell wandb which project to use, and that you want to log your model
os.environ["WANDB_PROJECT"] = f"{PROJECT_NAME}_CLOUD"
os.environ["WANDB_LOG_MODEL"] = "end"
## Load with Unsloth’s optimized API
# 1) Load quantized model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = BASE_MODEL_ID,
max_seq_length = MAX_LEN,
load_in_4bit = True,
full_finetuning= False, # we will add LoRA adapters next
)
# 2) Prepare it for k‑bit training (sets up layer norms, disables caching, etc.)
model = FastLanguageModel.get_peft_model(
model,
r = 16,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
max_seq_length = MAX_LEN,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
# Prepare the TaiwanChat Dataset
# 1) Load & split
dataset = load_dataset(DATASET_ID, split=f"train[:{N_SAMPLES}]")
# turn list-of-messages → a single “text” string per example, using Unsloth’s ChatML template
def fmt(examples):
texts = [
tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
for msgs in examples["messages"]
]
return {"text": texts}
dataset = dataset.map(fmt, batched=True, remove_columns=["messages"])
new_dataset = dataset.train_test_split(test_size = 0.01)
training_args = SFTConfig(
fp16_full_eval = False,
per_device_eval_batch_size = 1,
eval_accumulation_steps = 4,
eval_strategy = "epoch",
eval_steps = 1,
dataset_text_field="text",
output_dir=PROJECT_NAME,
max_seq_length = MAX_LEN,
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_steps = 10,
max_steps = 60,
logging_steps = 1,
optim = "adamw_8bit",
seed = 3407,
# ─── W&B integration ───
logging_dir=f"{PROJECT_NAME}/logs", # where to store TensorBoard/W&B logs
report_to=["wandb"], # enable W&B reporting
run_name=f"{PROJECT_NAME}_CLOUD", # name this run in your W&B project
push_to_hub=True,
gradient_checkpointing=True
)
# Training with Trainer
trainer = SFTTrainer(
model=model,
args=training_args,
data_collator = DataCollatorForLanguageModeling(tokenizer = tokenizer, mlm=False),
tokenizer=tokenizer,
callbacks=[WandbCallback], # ensure the W&B callback is attached
train_dataset = new_dataset["train"],
eval_dataset = new_dataset["test"],
)
torch.cuda.empty_cache()
trainer = train_on_responses_only(
trainer,
instruction_part = "<|im_start|>user\n",
response_part = "<|im_start|>assistant\n",
)
trainer.train()
model.push_to_hub_merged(
f'Luigi/{PROJECT_NAME}',
tokenizer,
save_method="merged_16bit",
safe_serialization=None
)
# 1. load merged model + tokenizer from your HF repo
tokenizer = AutoTokenizer.from_pretrained(f'Luigi/{PROJECT_NAME}')
model = AutoModelForCausalLM.from_pretrained(f'Luigi/{PROJECT_NAME}')
# 2. run text-generation
gen = pipeline(
"text-generation", model=model, tokenizer=tokenizer,
device_map="auto", # or device=0 for a single GPU
)
prompt = "請問台北今天的天氣如何?"
print(gen(prompt, max_new_tokens=MAX_LEN)[0]["generated_text"]) |