| model_name: "google/gemma-2-2b-it" | |
| new_model_name: "gemma-2-2b-ft" | |
| # LoRA Paraments | |
| lora_r: 64 | |
| lora_alpha: 16 | |
| lora_dropout: 0.1 | |
| #bitsandbytes parameters | |
| use_4bit: True | |
| bnb_4bit_compute_dtype: "float16" | |
| bnb_4bit_quant_type: "nf4" | |
| use_nested_quant: False | |
| #Training Arguments | |
| num_train_epochs: 1 | |
| fp16: False | |
| bf16: False | |
| per_device_train_batch_size: 2 | |
| per_device_eval_batch_size: 2 | |
| gradient_accumulation_steps: 2 | |
| gradient_checkpointing: True | |
| eval_strategy: "steps" | |
| eval_steps: 0.2 | |
| max_grad_norm: 0.3 | |
| learning_rate: 2e-4 | |
| weight_decay: 0.001 | |
| optimizer: "paged_adamw_32bit" | |
| lr_scheduler_type: "constant" | |
| max_steps: -1 | |
| warmup_steps: 5 | |
| group_by_length: True | |
| save_steps: 50 | |
| logging_steps: 50 | |
| logging_strategy: "steps" | |
| #SFT Arguments | |
| max_seq_length: 128 | |
| packing: True | |
| device_map: "auto" | |