ajustement to avoid oom err
Browse files- train_with_unsloth.py +8 -4
train_with_unsloth.py
CHANGED
|
@@ -20,6 +20,7 @@ from datasets import load_dataset
|
|
| 20 |
import os
|
| 21 |
from transformers.integrations import WandbCallback
|
| 22 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
|
|
| 23 |
|
| 24 |
PROJECT_NAME='SmolLM2-135M-Instruct-TaiwanChat'
|
| 25 |
BASE_MODEL_ID="unsloth/SmolLM2-135M-Instruct"
|
|
@@ -27,6 +28,8 @@ DATASET_ID="yentinglin/TaiwanChat"
|
|
| 27 |
N_SAMPLES=80000
|
| 28 |
MAX_LEN=2048
|
| 29 |
|
|
|
|
|
|
|
| 30 |
# Tell wandb which project to use, and that you want to log your model
|
| 31 |
os.environ["WANDB_PROJECT"] = f"{PROJECT_NAME}_CLOUD"
|
| 32 |
os.environ["WANDB_LOG_MODEL"] = "end"
|
|
@@ -73,15 +76,15 @@ dataset = dataset.map(fmt, batched=True, remove_columns=["messages"])
|
|
| 73 |
new_dataset = dataset.train_test_split(test_size = 0.01)
|
| 74 |
|
| 75 |
training_args = SFTConfig(
|
| 76 |
-
fp16_full_eval =
|
| 77 |
-
per_device_eval_batch_size =
|
| 78 |
eval_accumulation_steps = 4,
|
| 79 |
-
eval_strategy = "
|
| 80 |
eval_steps = 1,
|
| 81 |
dataset_text_field="text",
|
| 82 |
output_dir=PROJECT_NAME,
|
| 83 |
max_seq_length = MAX_LEN,
|
| 84 |
-
per_device_train_batch_size =
|
| 85 |
gradient_accumulation_steps = 4,
|
| 86 |
warmup_steps = 10,
|
| 87 |
max_steps = 60,
|
|
@@ -106,6 +109,7 @@ trainer = SFTTrainer(
|
|
| 106 |
train_dataset = new_dataset["train"],
|
| 107 |
eval_dataset = new_dataset["test"],
|
| 108 |
)
|
|
|
|
| 109 |
trainer = train_on_responses_only(
|
| 110 |
trainer,
|
| 111 |
instruction_part = "<|im_start|>user\n",
|
|
|
|
| 20 |
import os
|
| 21 |
from transformers.integrations import WandbCallback
|
| 22 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 23 |
+
import torch
|
| 24 |
|
| 25 |
PROJECT_NAME='SmolLM2-135M-Instruct-TaiwanChat'
|
| 26 |
BASE_MODEL_ID="unsloth/SmolLM2-135M-Instruct"
|
|
|
|
| 28 |
N_SAMPLES=80000
|
| 29 |
MAX_LEN=2048
|
| 30 |
|
| 31 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
|
| 32 |
+
|
| 33 |
# Tell wandb which project to use, and that you want to log your model
|
| 34 |
os.environ["WANDB_PROJECT"] = f"{PROJECT_NAME}_CLOUD"
|
| 35 |
os.environ["WANDB_LOG_MODEL"] = "end"
|
|
|
|
| 76 |
new_dataset = dataset.train_test_split(test_size = 0.01)
|
| 77 |
|
| 78 |
training_args = SFTConfig(
|
| 79 |
+
fp16_full_eval = False,
|
| 80 |
+
per_device_eval_batch_size = 1,
|
| 81 |
eval_accumulation_steps = 4,
|
| 82 |
+
eval_strategy = "epoch",
|
| 83 |
eval_steps = 1,
|
| 84 |
dataset_text_field="text",
|
| 85 |
output_dir=PROJECT_NAME,
|
| 86 |
max_seq_length = MAX_LEN,
|
| 87 |
+
per_device_train_batch_size = 1,
|
| 88 |
gradient_accumulation_steps = 4,
|
| 89 |
warmup_steps = 10,
|
| 90 |
max_steps = 60,
|
|
|
|
| 109 |
train_dataset = new_dataset["train"],
|
| 110 |
eval_dataset = new_dataset["test"],
|
| 111 |
)
|
| 112 |
+
torch.cuda.empty_cache()
|
| 113 |
trainer = train_on_responses_only(
|
| 114 |
trainer,
|
| 115 |
instruction_part = "<|im_start|>user\n",
|