File size: 2,074 Bytes
67a3add |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"type": "rewact",
"n_obs_steps": 1,
"normalization_mapping": {
"VISUAL": "MEAN_STD",
"STATE": "MEAN_STD",
"ACTION": "MEAN_STD"
},
"input_features": {
"observation.state": {
"type": "STATE",
"shape": [
6
]
},
"observation.images.top": {
"type": "VISUAL",
"shape": [
3,
480,
640
]
},
"observation.images.wrist": {
"type": "VISUAL",
"shape": [
3,
480,
640
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
6
]
}
},
"device": null,
"use_amp": false,
"push_to_hub": true,
"repo_id": "pravsels/so100_rewact_sam3",
"private": null,
"tags": null,
"license": null,
"chunk_size": 100,
"n_action_steps": 100,
"vision_backbone": "sam3_vit_l",
"pretrained_backbone_weights": "./sam3.pt",
"replace_final_stride_with_dilation": false,
"pre_norm": false,
"dim_model": 512,
"n_heads": 8,
"dim_feedforward": 3200,
"feedforward_activation": "relu",
"n_encoder_layers": 4,
"n_decoder_layers": 1,
"use_vae": true,
"latent_dim": 32,
"n_vae_encoder_layers": 4,
"temporal_ensemble_coeff": null,
"dropout": 0.1,
"kl_weight": 10.0,
"optimizer_lr": 5e-06,
"optimizer_weight_decay": 0.0001,
"optimizer_lr_backbone": 5e-06,
"use_reward_head": true,
"reward_loss_weight": 2.0,
"vision_encoder_type": "sam3",
"freeze_vision_encoder": true,
"dinov3": null,
"vjepa2": null,
"sam3": {
"variant": "vit_l",
"weights": "./sam3.pt",
"fpn_level": 2,
"pos_embed_type": "act_sinusoidal",
"patch_merge_stages": 2,
"use_patch_merge": false,
"input_resolution": 1008,
"compile_backbone": false
}
} |