adricl
/

midi_single_instrument_mistral_transformer

Model card Files Files and versions

midi_single_instrument_mistral_transformer / GPU_settings

adricl's picture

Trained Model with 14gb/2 dataset

4ec02d1 about 1 month ago

history blame contribute delete

2.34 kB

	training_config = TrainingArguments(
	model_dir_str, False, True, True, False, "steps",
	per_device_train_batch_size=64, #76% @ 24 batch size #76% @ 32 batch size try 64 batch size next time
	per_device_eval_batch_size=64, #was 24 now 32
	gradient_accumulation_steps=3, #change this to 4
	eval_accumulation_steps=None,
	eval_steps=2000,
	learning_rate=1e-4,
	weight_decay=0.01,
	max_grad_norm=1.0,
	max_steps=50000,
	lr_scheduler_type="cosine",
	warmup_ratio=0.08,
	log_level="debug",
	logging_strategy="steps",
	logging_steps=20,
	save_strategy="steps",
	save_steps=1000,
	save_total_limit=5,
	no_cuda=not USE_CUDA,
	seed=444,
	fp16=FP16,
	fp16_full_eval=FP16_EVAL,
	bf16=BF16,
	bf16_full_eval=BF16_EVAL,
	load_best_model_at_end=True,
	label_smoothing_factor=0.,
	optim="adamw_torch",
	report_to=["tensorboard"],
	gradient_checkpointing=True,
	dataloader_num_workers=8, #added to fix trashing isssue with the gpu not having enough data to process
	dataloader_pin_memory=True, #we want the dataset in memory
	torch_compile=True #added to speed up



	Better sugested by ai
	training_config = TrainingArguments(
	model_dir_str, False, True, True, False, "steps",
	per_device_train_batch_size=64, #76% @ 24 batch size #76% @ 32 batch size try 64 batch size next time
	per_device_eval_batch_size=64, #was 24 now 32
	gradient_accumulation_steps=3, #change this to 4
	eval_accumulation_steps=None,
	eval_steps=3000,
	eval_delay=6000,
	learning_rate=1e-4,
	weight_decay=0.01,
	max_grad_norm=1.0,
	max_steps=30000,
	lr_scheduler_type="cosine",
	warmup_ratio=0.08,
	log_level="debug",
	logging_strategy="steps",
	logging_steps=100,
	save_strategy="steps",
	save_steps=3000,
	save_total_limit=5,
	no_cuda=not USE_CUDA,
	seed=444,
	fp16=FP16,
	fp16_full_eval=FP16_EVAL,
	bf16=BF16,
	bf16_full_eval=BF16_EVAL,
	load_best_model_at_end=True,
	label_smoothing_factor=0.05,
	optim="adamw_torch",
	report_to=["tensorboard"],
	gradient_checkpointing=False,
	dataloader_num_workers=8, #added to fix trashing isssue with the gpu not having enough data to process
	dataloader_pin_memory=True, #we want the dataset in memory
	torch_compile=True #added to speed up