| { | |
| "modality_dims": { | |
| "clip_l": 768, | |
| "clip_g": 1280, | |
| "t5_xl_l": 2048, | |
| "t5_xl_g": 2048 | |
| }, | |
| "modality_seq_lens": { | |
| "clip_l": 77, | |
| "clip_g": 77, | |
| "t5_xl_l": 512, | |
| "t5_xl_g": 512 | |
| }, | |
| "binding_config": { | |
| "clip_l": { | |
| "t5_xl_l": 0.3 | |
| }, | |
| "clip_g": { | |
| "t5_xl_g": 0.3 | |
| }, | |
| "t5_xl_l": {}, | |
| "t5_xl_g": {} | |
| }, | |
| "latent_dim": 2048, | |
| "seq_len": 77, | |
| "encoder_layers": 3, | |
| "decoder_layers": 3, | |
| "hidden_dim": 1024, | |
| "dropout": 0.1, | |
| "fusion_strategy": "adaptive_cantor", | |
| "fusion_heads": 8, | |
| "fusion_dropout": 0.1, | |
| "cantor_depth": 8, | |
| "cantor_local_window": 3, | |
| "alpha_init": 1.0, | |
| "beta_init": 0.3, | |
| "alpha_lr_scale": 0.1, | |
| "beta_lr_scale": 1.0, | |
| "beta_kl": 0.1, | |
| "beta_reconstruction": 1.0, | |
| "beta_cross_modal": 0.05, | |
| "beta_alpha_regularization": 0.01, | |
| "recon_type": "mse", | |
| "modality_recon_weights": { | |
| "clip_l": 1.0, | |
| "clip_g": 1.0, | |
| "t5_xl_l": 0.3, | |
| "t5_xl_g": 0.3 | |
| }, | |
| "use_kl_annealing": true, | |
| "kl_anneal_epochs": 10, | |
| "kl_start_beta": 0.0, | |
| "batch_size": 8, | |
| "num_epochs": 100, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 1e-05, | |
| "gradient_clip": 1.0, | |
| "use_scheduler": true, | |
| "scheduler_type": "cosine", | |
| "num_samples": 10000, | |
| "synthetic_ratio": 0.15, | |
| "checkpoint_dir": "./checkpoints_lyra_adaptive_cantor", | |
| "save_every": 1000, | |
| "keep_last_n": 3, | |
| "hf_repo": "AbstractPhil/vae-lyra-xl-adaptive-cantor", | |
| "push_to_hub": true, | |
| "push_every": 2000, | |
| "auto_load_from_hub": true, | |
| "use_wandb": false, | |
| "wandb_project": "vae-lyra-adaptive-cantor", | |
| "wandb_entity": null, | |
| "log_every": 50, | |
| "device": "cuda", | |
| "mixed_precision": true, | |
| "seed": 42, | |
| "num_workers": 0 | |
| } |