adricl's picture
Trained Model with 14gb/2 dataset
4ec02d1
training_config = TrainingArguments(
model_dir_str, False, True, True, False, "steps",
per_device_train_batch_size=64, #76% @ 24 batch size #76% @ 32 batch size try 64 batch size next time
per_device_eval_batch_size=64, #was 24 now 32
gradient_accumulation_steps=3, #change this to 4
eval_accumulation_steps=None,
eval_steps=2000,
learning_rate=1e-4,
weight_decay=0.01,
max_grad_norm=1.0,
max_steps=50000,
lr_scheduler_type="cosine",
warmup_ratio=0.08,
log_level="debug",
logging_strategy="steps",
logging_steps=20,
save_strategy="steps",
save_steps=1000,
save_total_limit=5,
no_cuda=not USE_CUDA,
seed=444,
fp16=FP16,
fp16_full_eval=FP16_EVAL,
bf16=BF16,
bf16_full_eval=BF16_EVAL,
load_best_model_at_end=True,
label_smoothing_factor=0.,
optim="adamw_torch",
report_to=["tensorboard"],
gradient_checkpointing=True,
dataloader_num_workers=8, #added to fix trashing isssue with the gpu not having enough data to process
dataloader_pin_memory=True, #we want the dataset in memory
torch_compile=True #added to speed up
Better sugested by ai
training_config = TrainingArguments(
model_dir_str, False, True, True, False, "steps",
per_device_train_batch_size=64, #76% @ 24 batch size #76% @ 32 batch size try 64 batch size next time
per_device_eval_batch_size=64, #was 24 now 32
gradient_accumulation_steps=3, #change this to 4
eval_accumulation_steps=None,
eval_steps=3000,
eval_delay=6000,
learning_rate=1e-4,
weight_decay=0.01,
max_grad_norm=1.0,
max_steps=30000,
lr_scheduler_type="cosine",
warmup_ratio=0.08,
log_level="debug",
logging_strategy="steps",
logging_steps=100,
save_strategy="steps",
save_steps=3000,
save_total_limit=5,
no_cuda=not USE_CUDA,
seed=444,
fp16=FP16,
fp16_full_eval=FP16_EVAL,
bf16=BF16,
bf16_full_eval=BF16_EVAL,
load_best_model_at_end=True,
label_smoothing_factor=0.05,
optim="adamw_torch",
report_to=["tensorboard"],
gradient_checkpointing=False,
dataloader_num_workers=8, #added to fix trashing isssue with the gpu not having enough data to process
dataloader_pin_memory=True, #we want the dataset in memory
torch_compile=True #added to speed up