| { | |
| "adapter_path": "zen-nano/model", | |
| "alpha": [ | |
| 1e-05 | |
| ], | |
| "batch_size": 2, | |
| "beta": 0.1, | |
| "config": null, | |
| "data": "zen-nano/data", | |
| "de_quantize": false, | |
| "delta": 50.0, | |
| "dpo_cpo_loss_type": "sigmoid", | |
| "epochs": null, | |
| "epsilon": 0.0001, | |
| "epsilon_high": null, | |
| "fuse": true, | |
| "grad_checkpoint": false, | |
| "gradient_accumulation_steps": 1, | |
| "group_size": 4, | |
| "grpo_loss_type": "grpo", | |
| "importance_sampling_level": null, | |
| "iters": 100, | |
| "judge": "mlx-community/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2-4-bit", | |
| "judge_config": {}, | |
| "learning_rate": 1e-05, | |
| "list_reward_functions": false, | |
| "load_in_4bits": false, | |
| "load_in_6bits": false, | |
| "load_in_8bits": false, | |
| "lora_parameters": { | |
| "rank": 8, | |
| "dropout": 0.0, | |
| "scale": 10.0 | |
| }, | |
| "lr_schedule": null, | |
| "mask_prompt": false, | |
| "max_completion_length": 512, | |
| "max_seq_length": 2048, | |
| "model": "mlx-community/Qwen3-4B-Instruct-2507-4bit", | |
| "num_layers": 16, | |
| "optimizer": "adam", | |
| "optimizer_config": { | |
| "adam": {}, | |
| "adamw": {}, | |
| "muon": {}, | |
| "qhadam": {} | |
| }, | |
| "reference_model_path": null, | |
| "resume_adapter_file": null, | |
| "reward_functions": null, | |
| "reward_functions_file": null, | |
| "reward_scaling": 1.0, | |
| "reward_weights": null, | |
| "save_every": 100, | |
| "seed": 0, | |
| "steps_per_eval": 200, | |
| "steps_per_report": 10, | |
| "temperature": 1.0, | |
| "test": false, | |
| "test_batches": 500, | |
| "train": true, | |
| "train_mode": "sft", | |
| "train_type": "lora", | |
| "val_batches": 25, | |
| "wandb": null | |
| } |