| aa: rand-m7-inc1-mstd1.0-n3 | |
| amp: true | |
| amp_dtype: float16 | |
| amp_impl: native | |
| aug_repeats: 0.0 | |
| aug_splits: 0 | |
| batch_size: 256 | |
| bce_loss: false | |
| bce_pos_weight: null | |
| bce_sum: false | |
| bce_target_thresh: null | |
| bn_eps: null | |
| bn_momentum: null | |
| channels_last: false | |
| checkpoint_hist: 10 | |
| class_map: '' | |
| clip_grad: 5.0 | |
| clip_mode: norm | |
| color_jitter: null | |
| color_jitter_prob: null | |
| cooldown_epochs: 10 | |
| crop_pct: 0.95 | |
| cutmix: 0.8 | |
| cutmix_minmax: null | |
| data: null | |
| data_dir: /data/imagenet/ | |
| dataset: imagenet | |
| dataset_download: false | |
| decay_epochs: 100 | |
| decay_milestones: | |
| - 90 | |
| - 180 | |
| - 270 | |
| decay_rate: 0.1 | |
| device: cuda | |
| device_modules: null | |
| dist_bn: reduce | |
| drop: 0.0 | |
| drop_block: null | |
| drop_connect: null | |
| drop_path: 0.2 | |
| epoch_repeats: 0.0 | |
| epochs: 60 | |
| eval_metric: top1 | |
| experiment: '' | |
| fast_norm: false | |
| force_cpu: false | |
| fuser: '' | |
| gaussian_blur_prob: null | |
| gp: null | |
| grad_accum_steps: 1 | |
| grad_checkpointing: false | |
| grayscale_prob: null | |
| head_init_bias: null | |
| head_init_scale: 0.0 | |
| hflip: 0.5 | |
| img_size: 256 | |
| in_chans: null | |
| initial_checkpoint: '' | |
| input_img_mode: null | |
| input_key: null | |
| input_size: null | |
| interpolation: '' | |
| jsd_loss: false | |
| layer_decay: 0.75 | |
| local_rank: 0 | |
| log_interval: 50 | |
| log_wandb: false | |
| lr: 0.0001 | |
| lr_base: 0.0008 | |
| lr_base_scale: '' | |
| lr_base_size: 512 | |
| lr_cycle_decay: 0.5 | |
| lr_cycle_limit: 1 | |
| lr_cycle_mul: 1.0 | |
| lr_k_decay: 1.0 | |
| lr_noise: null | |
| lr_noise_pct: 0.67 | |
| lr_noise_std: 1.0 | |
| mean: null | |
| min_lr: 5.0e-07 | |
| mixup: 0.2 | |
| mixup_mode: batch | |
| mixup_off_epoch: 0 | |
| mixup_prob: 1.0 | |
| mixup_switch_prob: 0.5 | |
| model: vit_betwixt_patch16_reg4_gap_256 | |
| model_ema: true | |
| model_ema_decay: 0.999 | |
| model_ema_force_cpu: false | |
| model_ema_warmup: false | |
| model_kwargs: {} | |
| momentum: 0.9 | |
| no_aug: false | |
| no_ddp_bb: false | |
| no_prefetcher: false | |
| no_resume_opt: false | |
| num_classes: 1000 | |
| opt: adamw | |
| opt_betas: null | |
| opt_eps: 1.0e-08 | |
| opt_kwargs: {} | |
| output: '' | |
| patience_epochs: 10 | |
| pin_mem: true | |
| pretrained: true | |
| pretrained_path: vit_betw-in12k-8.pth | |
| ratio: | |
| - 0.75 | |
| - 1.3333333333333333 | |
| recount: 1 | |
| recovery_interval: 0 | |
| remode: pixel | |
| reprob: 0.2 | |
| resplit: false | |
| resume: '' | |
| save_images: false | |
| scale: | |
| - 0.08 | |
| - 1.0 | |
| sched: cosine | |
| sched_on_updates: true | |
| seed: 0 | |
| smoothing: 0.1 | |
| split_bn: false | |
| start_epoch: null | |
| std: null | |
| sync_bn: false | |
| synchronize_step: false | |
| target_key: null | |
| torchcompile: inductor | |
| torchscript: false | |
| train_crop_mode: null | |
| train_interpolation: random | |
| train_num_samples: null | |
| train_split: train | |
| tta: 0 | |
| use_multi_epochs_loader: false | |
| val_num_samples: null | |
| val_split: validation | |
| validation_batch_size: null | |
| vflip: 0.0 | |
| warmup_epochs: 20 | |
| warmup_lr: 5.0e-07 | |
| warmup_prefix: false | |
| weight_decay: 0.01 | |
| worker_seeding: all | |
| workers: 8 | |