File size: 1,676 Bytes
c315639 2379302 c315639 ca18b3a c315639 7af6956 c315639 ca18b3a c315639 4cb5a10 c315639 4cb5a10 c315639 ca18b3a c315639 ca18b3a 95f5cc5 c315639 ca18b3a c315639 95f5cc5 c315639 768b27d c315639 7af6956 ca18b3a c315639 4cb5a10 c315639 4cb5a10 ca18b3a c315639 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
{
"architectures": [
"Gemma3ForConditionalGeneration"
],
"boi_token_index": 255999,
"dtype": "bfloat16",
"eoi_token_index": 256000,
"eos_token_id": [
1,
106
],
"image_token_index": 262144,
"initializer_range": 0.02,
"mm_tokens_per_image": 256,
"model_type": "gemma3",
"text_config": {
"_sliding_window_pattern": 6,
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"final_logit_softcapping": null,
"head_dim": 256,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 16,
"initializer_range": 0.02,
"intermediate_size": 10240,
"layer_types": [
"sliding_attention",
"sliding_attention"
],
"max_position_embeddings": 131072,
"model_type": "gemma3_text",
"num_attention_heads": 4,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_local_base_freq": 10000.0,
"rope_scaling": {
"factor": 8.0,
"rope_type": "linear"
},
"rope_theta": 1000000.0,
"sliding_window": 1024,
"use_bidirectional_attention": false,
"use_cache": true,
"vocab_size": 262208
},
"transformers_version": "4.57.0.dev0",
"vision_config": {
"attention_dropout": 0.0,
"embed_dim": 64,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 16,
"image_size": 896,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 4,
"num_channels": 3,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"patch_size": 14,
"vision_use_head": false
}
}
|