| model_name: molmo | |
| llm: | |
| d_model: 3584 | |
| n_heads: 28 | |
| n_kv_heads: 4 | |
| head_dim: null | |
| qkv_bias: true | |
| clip_qkv: null | |
| n_layers: 28 | |
| mlp_ratio: 4 | |
| mlp_hidden_size: 37888 | |
| activation_type: swiglu | |
| block_type: sequential | |
| rope: true | |
| rope_full_precision: true | |
| rope_theta: 1000000.0 | |
| rope_type: default | |
| rope_factor: null | |
| rope_high_freq_factor: null | |
| rope_low_freq_factor: null | |
| rope_original_max_position_embeddings: null | |
| attention_type: sdpa | |
| float32_attention: true | |
| attention_dropout: 0.0 | |
| attention_layer_norm: false | |
| attention_layer_norm_type: olmo | |
| residual_dropout: 0.1 | |
| response_residual_dropout: 0.0 | |
| layer_norm_type: rms | |
| layer_norm_with_affine: true | |
| layer_norm_eps: 1.0e-06 | |
| attention_layer_norm_with_affine: true | |
| max_sequence_length: 4096 | |
| max_position_embeddings: null | |
| include_bias: false | |
| bias_for_layer_norm: null | |
| norm_after: false | |
| moe_num_experts: 8 | |
| moe_top_k: 2 | |
| moe_mlp_impl: sparse | |
| moe_log_expert_assignment: false | |
| moe_shared_expert: false | |
| moe_lbl_in_fp32: false | |
| moe_interleave: false | |
| moe_loss_weight: 0.1 | |
| moe_zloss_weight: null | |
| moe_dropless: true | |
| moe_capacity_factor: 1.25 | |
| embedding_dropout: 0.0 | |
| scale_logits: false | |
| vocab_size: 152064 | |
| additional_vocab_size: 128 | |
| weight_tying: false | |
| embedding_size: 152064 | |
| use_position_ids: true | |
| tokenizer: | |
| identifier: Qwen/Qwen2.5-7B | |
| tokenizer_dir: null | |
| depth_tokens: true | |
| init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt | |
| init_incremental: null | |
| new_embedding_init_range: 0.02 | |
| initializer_range: 0.02 | |
| normalize_input_embeds: false | |
| activation_checkpoint: whole_layer | |
| compile: blocks | |
| fix_pad_tokenizer: false | |
| resize_vocab: false | |
| init_std: 0.02 | |
| init_fn: normal | |
| init_cutoff_factor: null | |
| vision_backbone: | |
| vit: | |
| image_model_type: siglip | |
| image_default_input_size: | |
| - 378 | |
| - 378 | |
| image_patch_size: 14 | |
| image_pos_patch_size: 14 | |
| image_emb_dim: 1152 | |
| image_num_heads: 16 | |
| image_num_key_value_heads: 16 | |
| image_num_layers: 27 | |
| image_head_dim: 72 | |
| image_mlp_dim: 4304 | |
| image_mlp_activations: gelu_pytorch_tanh | |
| image_dropout_rate: 0.0 | |
| image_num_pos: 729 | |
| image_norm_eps: 1.0e-06 | |
| attention_dropout: 0.0 | |
| residual_dropout: 0.0 | |
| initializer_range: 0.02 | |
| float32_attention: true | |
| attention_type: sdpa | |
| activation_checkpointing: true | |
| init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt | |
| resize_mode: siglip | |
| pad_value: 0.0 | |
| normalize: siglip | |
| image_pooling_2d: attention_meanq | |
| pooling_attention_mask: false | |
| image_projector: mlp | |
| image_padding_embed: null | |
| vit_layers: | |
| - -3 | |
| - -9 | |
| skip_unused_layers: true | |
| image_feature_dropout: 0.0 | |
| connector_activation_checkpointing: true | |
| compile_vit: blocks | |
| data_formatter: | |
| prompt_templates: uber_model | |
| message_format: role | |
| system_prompt: demo_or_style | |
| always_start_with_space: false | |
| default_inference_len: 65 | |
| select_answer: best | |
| debug: false | |
| image_last: false | |
| format_message_list: null | |
| p_one_message: 0.0 | |
| mm_preprocessor: | |
| crop_mode: overlap-and-resize-c2 | |
| max_crops: 8 | |
| max_images: 2 | |
| max_multi_image_crops: 8 | |
| pooling_w: 2 | |
| pooling_h: 2 | |
| overlap_margins: | |
| - 4 | |
| - 4 | |
| use_col_tokens: true | |
| loss_token_weighting: root_subsegments | |
| legacy_image_mask: false | |
| max_answer_len: null | |
| img_aug: false | |
| bi_directional_attn: null | |
| lora_enable: false | |
| lora_rank: 64 | |
| lora_alpha: 16 | |
| lora_dropout: 0.05 | |
| lora_bias: none | |
| n_action_bins: 256 | |
| norm_stats: | |
| molmoact: | |
| action: | |
| mean: | |
| - 0.0005706787342205644 | |
| - 0.0002448957529850304 | |
| - -3.5987635783385485e-05 | |
| - 0.00021597897284664214 | |
| - -0.0004896928439848125 | |
| - -0.000241481073317118 | |
| - 0.5570635199546814 | |
| std: | |
| - 0.005207270849496126 | |
| - 0.007506529800593853 | |
| - 0.006415561307221651 | |
| - 0.013248044066131115 | |
| - 0.010928540490567684 | |
| - 0.014873150736093521 | |
| - 0.49715080857276917 | |
| min: | |
| - -0.07434078305959702 | |
| - -0.07339745759963989 | |
| - -0.06539416313171387 | |
| - -0.1688285619020462 | |
| - -0.10289879888296127 | |
| - -0.2667275667190552 | |
| - 0.0 | |
| max: | |
| - 0.06042003631591797 | |
| - 0.09417290985584259 | |
| - 0.07019275426864624 | |
| - 0.2616892158985138 | |
| - 0.11751057207584381 | |
| - 0.16968433558940887 | |
| - 1.0 | |
| q01: | |
| - -0.01538565568625927 | |
| - -0.021047022193670273 | |
| - -0.01688069850206375 | |
| - -0.044314172118902206 | |
| - -0.03890235349535942 | |
| - -0.04788423702120781 | |
| - 0.0 | |
| q99: | |
| - 0.014661382883787155 | |
| - 0.026515591889619827 | |
| - 0.021398313343524933 | |
| - 0.04216696694493294 | |
| - 0.03401297703385353 | |
| - 0.04957397282123566 | |
| - 1.0 | |
| num_entries: 1560068 | |