File size: 1,676 Bytes
c315639
 
 
 
 
2379302
c315639
ca18b3a
 
 
 
c315639
 
 
 
 
 
 
 
 
 
 
 
7af6956
c315639
ca18b3a
c315639
 
 
 
 
 
4cb5a10
c315639
4cb5a10
c315639
 
 
ca18b3a
 
 
 
c315639
ca18b3a
95f5cc5
c315639
ca18b3a
c315639
95f5cc5
c315639
 
768b27d
c315639
7af6956
ca18b3a
 
c315639
 
4cb5a10
c315639
 
4cb5a10
ca18b3a
 
c315639
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{
  "architectures": [
    "Gemma3ForConditionalGeneration"
  ],
  "boi_token_index": 255999,
  "dtype": "bfloat16",
  "eoi_token_index": 256000,
  "eos_token_id": [
    1,
    106
  ],
  "image_token_index": 262144,
  "initializer_range": 0.02,
  "mm_tokens_per_image": 256,
  "model_type": "gemma3",
  "text_config": {
    "_sliding_window_pattern": 6,
    "attention_bias": false,
    "attention_dropout": 0.0,
    "attn_logit_softcapping": null,
    "final_logit_softcapping": null,
    "head_dim": 256,
    "hidden_activation": "gelu_pytorch_tanh",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 10240,
    "layer_types": [
      "sliding_attention",
      "sliding_attention"
    ],
    "max_position_embeddings": 131072,
    "model_type": "gemma3_text",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "query_pre_attn_scalar": 256,
    "rms_norm_eps": 1e-06,
    "rope_local_base_freq": 10000.0,
    "rope_scaling": {
      "factor": 8.0,
      "rope_type": "linear"
    },
    "rope_theta": 1000000.0,
    "sliding_window": 1024,
    "use_bidirectional_attention": false,
    "use_cache": true,
    "vocab_size": 262208
  },
  "transformers_version": "4.57.0.dev0",
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "gelu_pytorch_tanh",
    "hidden_size": 16,
    "image_size": 896,
    "intermediate_size": 4304,
    "layer_norm_eps": 1e-06,
    "model_type": "siglip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "vision_use_head": false
  }
}