philperceptron commited on
Commit
2687dea
·
1 Parent(s): 10e921e

update config

Browse files
added_tokens.json CHANGED
@@ -2,6 +2,273 @@
2
  "</think>": 151668,
3
  "</tool_call>": 151658,
4
  "</tool_response>": 151666,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "<think>": 151667,
6
  "<tool_call>": 151657,
7
  "<tool_response>": 151665,
 
2
  "</think>": 151668,
3
  "</tool_call>": 151658,
4
  "</tool_response>": 151666,
5
+ "<reserved_0>": 151669,
6
+ "<reserved_100>": 151769,
7
+ "<reserved_101>": 151770,
8
+ "<reserved_102>": 151771,
9
+ "<reserved_103>": 151772,
10
+ "<reserved_104>": 151773,
11
+ "<reserved_105>": 151774,
12
+ "<reserved_106>": 151775,
13
+ "<reserved_107>": 151776,
14
+ "<reserved_108>": 151777,
15
+ "<reserved_109>": 151778,
16
+ "<reserved_10>": 151679,
17
+ "<reserved_110>": 151779,
18
+ "<reserved_111>": 151780,
19
+ "<reserved_112>": 151781,
20
+ "<reserved_113>": 151782,
21
+ "<reserved_114>": 151783,
22
+ "<reserved_115>": 151784,
23
+ "<reserved_116>": 151785,
24
+ "<reserved_117>": 151786,
25
+ "<reserved_118>": 151787,
26
+ "<reserved_119>": 151788,
27
+ "<reserved_11>": 151680,
28
+ "<reserved_120>": 151789,
29
+ "<reserved_121>": 151790,
30
+ "<reserved_122>": 151791,
31
+ "<reserved_123>": 151792,
32
+ "<reserved_124>": 151793,
33
+ "<reserved_125>": 151794,
34
+ "<reserved_126>": 151795,
35
+ "<reserved_127>": 151796,
36
+ "<reserved_128>": 151797,
37
+ "<reserved_129>": 151798,
38
+ "<reserved_12>": 151681,
39
+ "<reserved_130>": 151799,
40
+ "<reserved_131>": 151800,
41
+ "<reserved_132>": 151801,
42
+ "<reserved_133>": 151802,
43
+ "<reserved_134>": 151803,
44
+ "<reserved_135>": 151804,
45
+ "<reserved_136>": 151805,
46
+ "<reserved_137>": 151806,
47
+ "<reserved_138>": 151807,
48
+ "<reserved_139>": 151808,
49
+ "<reserved_13>": 151682,
50
+ "<reserved_140>": 151809,
51
+ "<reserved_141>": 151810,
52
+ "<reserved_142>": 151811,
53
+ "<reserved_143>": 151812,
54
+ "<reserved_144>": 151813,
55
+ "<reserved_145>": 151814,
56
+ "<reserved_146>": 151815,
57
+ "<reserved_147>": 151816,
58
+ "<reserved_148>": 151817,
59
+ "<reserved_149>": 151818,
60
+ "<reserved_14>": 151683,
61
+ "<reserved_150>": 151819,
62
+ "<reserved_151>": 151820,
63
+ "<reserved_152>": 151821,
64
+ "<reserved_153>": 151822,
65
+ "<reserved_154>": 151823,
66
+ "<reserved_155>": 151824,
67
+ "<reserved_156>": 151825,
68
+ "<reserved_157>": 151826,
69
+ "<reserved_158>": 151827,
70
+ "<reserved_159>": 151828,
71
+ "<reserved_15>": 151684,
72
+ "<reserved_160>": 151829,
73
+ "<reserved_161>": 151830,
74
+ "<reserved_162>": 151831,
75
+ "<reserved_163>": 151832,
76
+ "<reserved_164>": 151833,
77
+ "<reserved_165>": 151834,
78
+ "<reserved_166>": 151835,
79
+ "<reserved_167>": 151836,
80
+ "<reserved_168>": 151837,
81
+ "<reserved_169>": 151838,
82
+ "<reserved_16>": 151685,
83
+ "<reserved_170>": 151839,
84
+ "<reserved_171>": 151840,
85
+ "<reserved_172>": 151841,
86
+ "<reserved_173>": 151842,
87
+ "<reserved_174>": 151843,
88
+ "<reserved_175>": 151844,
89
+ "<reserved_176>": 151845,
90
+ "<reserved_177>": 151846,
91
+ "<reserved_178>": 151847,
92
+ "<reserved_179>": 151848,
93
+ "<reserved_17>": 151686,
94
+ "<reserved_180>": 151849,
95
+ "<reserved_181>": 151850,
96
+ "<reserved_182>": 151851,
97
+ "<reserved_183>": 151852,
98
+ "<reserved_184>": 151853,
99
+ "<reserved_185>": 151854,
100
+ "<reserved_186>": 151855,
101
+ "<reserved_187>": 151856,
102
+ "<reserved_188>": 151857,
103
+ "<reserved_189>": 151858,
104
+ "<reserved_18>": 151687,
105
+ "<reserved_190>": 151859,
106
+ "<reserved_191>": 151860,
107
+ "<reserved_192>": 151861,
108
+ "<reserved_193>": 151862,
109
+ "<reserved_194>": 151863,
110
+ "<reserved_195>": 151864,
111
+ "<reserved_196>": 151865,
112
+ "<reserved_197>": 151866,
113
+ "<reserved_198>": 151867,
114
+ "<reserved_199>": 151868,
115
+ "<reserved_19>": 151688,
116
+ "<reserved_1>": 151670,
117
+ "<reserved_200>": 151869,
118
+ "<reserved_201>": 151870,
119
+ "<reserved_202>": 151871,
120
+ "<reserved_203>": 151872,
121
+ "<reserved_204>": 151873,
122
+ "<reserved_205>": 151874,
123
+ "<reserved_206>": 151875,
124
+ "<reserved_207>": 151876,
125
+ "<reserved_208>": 151877,
126
+ "<reserved_209>": 151878,
127
+ "<reserved_20>": 151689,
128
+ "<reserved_210>": 151879,
129
+ "<reserved_211>": 151880,
130
+ "<reserved_212>": 151881,
131
+ "<reserved_213>": 151882,
132
+ "<reserved_214>": 151883,
133
+ "<reserved_215>": 151884,
134
+ "<reserved_216>": 151885,
135
+ "<reserved_217>": 151886,
136
+ "<reserved_218>": 151887,
137
+ "<reserved_219>": 151888,
138
+ "<reserved_21>": 151690,
139
+ "<reserved_220>": 151889,
140
+ "<reserved_221>": 151890,
141
+ "<reserved_222>": 151891,
142
+ "<reserved_223>": 151892,
143
+ "<reserved_224>": 151893,
144
+ "<reserved_225>": 151894,
145
+ "<reserved_226>": 151895,
146
+ "<reserved_227>": 151896,
147
+ "<reserved_228>": 151897,
148
+ "<reserved_229>": 151898,
149
+ "<reserved_22>": 151691,
150
+ "<reserved_230>": 151899,
151
+ "<reserved_231>": 151900,
152
+ "<reserved_232>": 151901,
153
+ "<reserved_233>": 151902,
154
+ "<reserved_234>": 151903,
155
+ "<reserved_235>": 151904,
156
+ "<reserved_236>": 151905,
157
+ "<reserved_237>": 151906,
158
+ "<reserved_238>": 151907,
159
+ "<reserved_239>": 151908,
160
+ "<reserved_23>": 151692,
161
+ "<reserved_240>": 151909,
162
+ "<reserved_241>": 151910,
163
+ "<reserved_242>": 151911,
164
+ "<reserved_243>": 151912,
165
+ "<reserved_244>": 151913,
166
+ "<reserved_245>": 151914,
167
+ "<reserved_246>": 151915,
168
+ "<reserved_247>": 151916,
169
+ "<reserved_248>": 151917,
170
+ "<reserved_249>": 151918,
171
+ "<reserved_24>": 151693,
172
+ "<reserved_250>": 151919,
173
+ "<reserved_251>": 151920,
174
+ "<reserved_252>": 151921,
175
+ "<reserved_253>": 151922,
176
+ "<reserved_254>": 151923,
177
+ "<reserved_255>": 151924,
178
+ "<reserved_256>": 151925,
179
+ "<reserved_257>": 151926,
180
+ "<reserved_258>": 151927,
181
+ "<reserved_259>": 151928,
182
+ "<reserved_25>": 151694,
183
+ "<reserved_260>": 151929,
184
+ "<reserved_261>": 151930,
185
+ "<reserved_262>": 151931,
186
+ "<reserved_263>": 151932,
187
+ "<reserved_264>": 151933,
188
+ "<reserved_265>": 151934,
189
+ "<reserved_266>": 151935,
190
+ "<reserved_26>": 151695,
191
+ "<reserved_27>": 151696,
192
+ "<reserved_28>": 151697,
193
+ "<reserved_29>": 151698,
194
+ "<reserved_2>": 151671,
195
+ "<reserved_30>": 151699,
196
+ "<reserved_31>": 151700,
197
+ "<reserved_32>": 151701,
198
+ "<reserved_33>": 151702,
199
+ "<reserved_34>": 151703,
200
+ "<reserved_35>": 151704,
201
+ "<reserved_36>": 151705,
202
+ "<reserved_37>": 151706,
203
+ "<reserved_38>": 151707,
204
+ "<reserved_39>": 151708,
205
+ "<reserved_3>": 151672,
206
+ "<reserved_40>": 151709,
207
+ "<reserved_41>": 151710,
208
+ "<reserved_42>": 151711,
209
+ "<reserved_43>": 151712,
210
+ "<reserved_44>": 151713,
211
+ "<reserved_45>": 151714,
212
+ "<reserved_46>": 151715,
213
+ "<reserved_47>": 151716,
214
+ "<reserved_48>": 151717,
215
+ "<reserved_49>": 151718,
216
+ "<reserved_4>": 151673,
217
+ "<reserved_50>": 151719,
218
+ "<reserved_51>": 151720,
219
+ "<reserved_52>": 151721,
220
+ "<reserved_53>": 151722,
221
+ "<reserved_54>": 151723,
222
+ "<reserved_55>": 151724,
223
+ "<reserved_56>": 151725,
224
+ "<reserved_57>": 151726,
225
+ "<reserved_58>": 151727,
226
+ "<reserved_59>": 151728,
227
+ "<reserved_5>": 151674,
228
+ "<reserved_60>": 151729,
229
+ "<reserved_61>": 151730,
230
+ "<reserved_62>": 151731,
231
+ "<reserved_63>": 151732,
232
+ "<reserved_64>": 151733,
233
+ "<reserved_65>": 151734,
234
+ "<reserved_66>": 151735,
235
+ "<reserved_67>": 151736,
236
+ "<reserved_68>": 151737,
237
+ "<reserved_69>": 151738,
238
+ "<reserved_6>": 151675,
239
+ "<reserved_70>": 151739,
240
+ "<reserved_71>": 151740,
241
+ "<reserved_72>": 151741,
242
+ "<reserved_73>": 151742,
243
+ "<reserved_74>": 151743,
244
+ "<reserved_75>": 151744,
245
+ "<reserved_76>": 151745,
246
+ "<reserved_77>": 151746,
247
+ "<reserved_78>": 151747,
248
+ "<reserved_79>": 151748,
249
+ "<reserved_7>": 151676,
250
+ "<reserved_80>": 151749,
251
+ "<reserved_81>": 151750,
252
+ "<reserved_82>": 151751,
253
+ "<reserved_83>": 151752,
254
+ "<reserved_84>": 151753,
255
+ "<reserved_85>": 151754,
256
+ "<reserved_86>": 151755,
257
+ "<reserved_87>": 151756,
258
+ "<reserved_88>": 151757,
259
+ "<reserved_89>": 151758,
260
+ "<reserved_8>": 151677,
261
+ "<reserved_90>": 151759,
262
+ "<reserved_91>": 151760,
263
+ "<reserved_92>": 151761,
264
+ "<reserved_93>": 151762,
265
+ "<reserved_94>": 151763,
266
+ "<reserved_95>": 151764,
267
+ "<reserved_96>": 151765,
268
+ "<reserved_97>": 151766,
269
+ "<reserved_98>": 151767,
270
+ "<reserved_99>": 151768,
271
+ "<reserved_9>": 151678,
272
  "<think>": 151667,
273
  "<tool_call>": 151657,
274
  "<tool_response>": 151665,
config.json CHANGED
@@ -1,4 +1,9 @@
1
  {
 
 
 
 
 
2
  "architectures": [
3
  "IsaacForConditionalGeneration"
4
  ],
@@ -55,26 +60,98 @@
55
  "num_key_value_heads": 8,
56
  "pixel_shuffle_scale": 2,
57
  "rms_norm_eps": 1e-06,
58
- "rope_scaling": {
59
- "mrope_interleaved": true,
60
- "mrope_section": null,
61
- "rope_type": "default"
62
- },
63
  "rope_theta": 1000000.0,
64
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "tie_word_embeddings": false,
66
- "transformers_version": "4.56.1",
67
  "use_cache": true,
68
  "use_sliding_window": false,
69
- "video_patch_size": 16,
70
  "vision_config": {
 
71
  "attention_dropout": 0.0,
72
  "hidden_act": "gelu_pytorch_tanh",
73
  "hidden_size": 1152,
74
  "image_size": 256,
75
  "intermediate_size": 4304,
76
  "layer_norm_eps": 1e-06,
77
- "model_type": "pixel_shuffle_siglip2",
78
  "num_attention_heads": 16,
79
  "num_channels": 3,
80
  "num_hidden_layers": 27,
@@ -83,7 +160,19 @@
83
  "pixel_shuffle_scale_factor": 2
84
  },
85
  "vision_max_num_patches": 6144,
 
 
 
 
 
86
  "vision_min_num_patches": 256,
 
 
 
 
 
 
 
87
  "vision_token": "<image>",
88
  "vocab_size": 151936
89
  }
 
1
  {
2
+ "_rope_scaling": {
3
+ "mrope_interleaved": true,
4
+ "mrope_section": null,
5
+ "rope_type": "default"
6
+ },
7
  "architectures": [
8
  "IsaacForConditionalGeneration"
9
  ],
 
60
  "num_key_value_heads": 8,
61
  "pixel_shuffle_scale": 2,
62
  "rms_norm_eps": 1e-06,
 
 
 
 
 
63
  "rope_theta": 1000000.0,
64
  "sliding_window": null,
65
+ "text_config": {
66
+ "_name_or_path": "/tmp/qwen3_temp_j1e9g12y/hf-checkpoint",
67
+ "architectures": [
68
+ "IsaacForConditionalGeneration"
69
+ ],
70
+ "attention_bias": false,
71
+ "attention_dropout": 0.0,
72
+ "bos_token_id": 151643,
73
+ "dtype": "float32",
74
+ "eos_token_id": 151645,
75
+ "head_dim": 128,
76
+ "hidden_act": "silu",
77
+ "hidden_size": 2048,
78
+ "initializer_range": 0.02,
79
+ "intermediate_size": 6144,
80
+ "layer_types": [
81
+ "full_attention",
82
+ "full_attention",
83
+ "full_attention",
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention",
89
+ "full_attention",
90
+ "full_attention",
91
+ "full_attention",
92
+ "full_attention",
93
+ "full_attention",
94
+ "full_attention",
95
+ "full_attention",
96
+ "full_attention",
97
+ "full_attention",
98
+ "full_attention",
99
+ "full_attention",
100
+ "full_attention",
101
+ "full_attention",
102
+ "full_attention",
103
+ "full_attention",
104
+ "full_attention",
105
+ "full_attention",
106
+ "full_attention",
107
+ "full_attention",
108
+ "full_attention"
109
+ ],
110
+ "max_position_embeddings": 40960,
111
+ "max_window_layers": 28,
112
+ "model_type": "qwen3",
113
+ "num_attention_heads": 16,
114
+ "num_hidden_layers": 28,
115
+ "num_key_value_heads": 8,
116
+ "pixel_shuffle_scale": 2,
117
+ "rms_norm_eps": 1e-06,
118
+ "rope_scaling": {
119
+ "mrope_interleaved": true,
120
+ "mrope_section": null,
121
+ "rope_type": "default"
122
+ },
123
+ "rope_theta": 1000000.0,
124
+ "sliding_window": null,
125
+ "use_cache": true,
126
+ "use_sliding_window": false,
127
+ "vision_max_num_patches": 6144,
128
+ "vision_mean": [
129
+ 0.5,
130
+ 0.5,
131
+ 0.5
132
+ ],
133
+ "vision_min_num_patches": 256,
134
+ "vision_patch_size": 16,
135
+ "vision_std": [
136
+ 0.5,
137
+ 0.5,
138
+ 0.5
139
+ ],
140
+ "vocab_size": 151936
141
+ },
142
  "tie_word_embeddings": false,
143
+ "transformers_version": "4.57.1",
144
  "use_cache": true,
145
  "use_sliding_window": false,
 
146
  "vision_config": {
147
+ "_attn_implementation": "flash_attention_2",
148
  "attention_dropout": 0.0,
149
  "hidden_act": "gelu_pytorch_tanh",
150
  "hidden_size": 1152,
151
  "image_size": 256,
152
  "intermediate_size": 4304,
153
  "layer_norm_eps": 1e-06,
154
+ "model_type": "isaac_vision",
155
  "num_attention_heads": 16,
156
  "num_channels": 3,
157
  "num_hidden_layers": 27,
 
160
  "pixel_shuffle_scale_factor": 2
161
  },
162
  "vision_max_num_patches": 6144,
163
+ "vision_mean": [
164
+ 0.5,
165
+ 0.5,
166
+ 0.5
167
+ ],
168
  "vision_min_num_patches": 256,
169
+ "vision_patch_size": 16,
170
+ "vision_rescale_factor": 0.00392156862745098,
171
+ "vision_std": [
172
+ 0.5,
173
+ 0.5,
174
+ 0.5
175
+ ],
176
  "vision_token": "<image>",
177
  "vocab_size": 151936
178
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
- "transformers_version": "4.56.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
+ "transformers_version": "4.57.1"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d31217bf5365162ae38b4e6a5b27acff8481ef892e9803874cbb49476d0f501
3
- size 4969539560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb45f8ee129e542e12ea62717345ce39118f3f26971a082410b70f898aad3f3
3
+ size 4969541832
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e133442cabfd18ed5ba13cd21527d0220c78e2989a2778b8849e5835e0995c75
3
- size 4054187824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4c2c3f1ba00b71d2c21fdf777535dcf56655e0d0e9d9fbd037f9a5f3181a57
3
+ size 4054188968
model.safetensors.index.json CHANGED
@@ -5,316 +5,316 @@
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00003-of-00003.safetensors",
8
- "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
9
- "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
10
- "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
11
- "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
12
- "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
13
- "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
14
- "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
15
- "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
16
- "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
17
- "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
18
- "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
19
- "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
20
- "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
21
- "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
22
- "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
23
- "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
24
- "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
25
- "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
26
- "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
27
- "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
28
- "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
29
- "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
30
- "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
31
- "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
32
- "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
33
- "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
34
- "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
35
- "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
36
- "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
37
- "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
38
- "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
39
- "model.layers.10.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
40
- "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
41
- "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
42
- "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
43
- "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
44
- "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
45
- "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
46
- "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
47
- "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
48
- "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
49
- "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
50
- "model.layers.11.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
51
- "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
52
- "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
53
- "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors",
54
- "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
55
- "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
56
- "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
57
- "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
58
- "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
59
- "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
60
- "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
61
- "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
62
- "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
63
- "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
64
- "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors",
65
- "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
66
- "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
67
- "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
68
- "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
69
- "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
70
- "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
71
- "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
72
- "model.layers.13.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
73
- "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
74
- "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
75
- "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors",
76
- "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
77
- "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
78
- "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
79
- "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
80
- "model.layers.14.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
81
- "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
82
- "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
83
- "model.layers.14.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
84
- "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
85
- "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
86
- "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors",
87
- "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
88
- "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
89
- "model.layers.15.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
90
- "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
91
- "model.layers.15.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
92
- "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
93
- "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
94
- "model.layers.15.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
95
- "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
96
- "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
97
- "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors",
98
- "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
99
- "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
100
- "model.layers.16.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
101
- "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
102
- "model.layers.16.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
103
- "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
104
- "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
105
- "model.layers.16.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
106
- "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
107
- "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
108
- "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors",
109
- "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
110
- "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
111
- "model.layers.17.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
112
- "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
113
- "model.layers.17.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
114
- "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
115
- "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
116
- "model.layers.17.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
117
- "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
118
- "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
119
- "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
120
- "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
121
- "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
122
- "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
123
- "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
124
- "model.layers.18.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
125
- "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
126
- "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
127
- "model.layers.18.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
128
- "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
129
- "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
130
- "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
131
- "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
132
- "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
133
- "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
134
- "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
135
- "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
136
- "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
137
- "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
138
- "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
139
- "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
140
- "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
141
- "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
142
- "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
143
- "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
144
- "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
145
- "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
146
- "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
147
- "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
148
- "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
149
- "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
150
- "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
151
- "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
152
- "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
153
- "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
154
- "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
155
- "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
156
- "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
157
- "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
158
- "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
159
- "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
160
- "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
161
- "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
162
- "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
163
- "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
164
- "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
165
- "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
166
- "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
167
- "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
168
- "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
169
- "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
170
- "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
171
- "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
172
- "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
173
- "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
174
- "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
175
- "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
176
- "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
177
- "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
178
- "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
179
- "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
180
- "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
181
- "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
182
- "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
183
- "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
184
- "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
185
- "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
186
- "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
187
- "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
188
- "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
189
- "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
190
- "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
191
- "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
192
- "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
193
- "model.layers.23.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
194
- "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
195
- "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
196
- "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
197
- "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
198
- "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
199
- "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
200
- "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
201
- "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
202
- "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
203
- "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
204
- "model.layers.24.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
205
- "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
206
- "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
207
- "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
208
- "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
209
- "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
210
- "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
211
- "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
212
- "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
213
- "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
214
- "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
215
- "model.layers.25.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
216
- "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
217
- "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
218
- "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
219
- "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
220
- "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
221
- "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
222
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
223
- "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
224
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
225
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
226
- "model.layers.26.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
227
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
228
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
229
- "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
230
- "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
231
- "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
232
- "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
233
- "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
234
- "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
235
- "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
236
- "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
237
- "model.layers.27.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
238
- "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
239
- "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
240
- "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
241
- "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
242
- "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
243
- "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
244
- "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
245
- "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
246
- "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
247
- "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
248
- "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
249
- "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
- "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
- "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
- "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
- "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
- "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
- "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
257
- "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
258
- "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
259
- "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
260
- "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
261
- "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
262
- "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
263
- "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
264
- "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
265
- "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
266
- "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
267
- "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
268
- "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
269
- "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
270
- "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
271
- "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
272
- "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
273
- "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
274
- "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
275
- "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
276
- "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
277
- "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
278
- "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
279
- "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
280
- "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
281
- "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
282
- "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
283
- "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
284
- "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
285
- "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
286
- "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
287
- "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
288
- "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
289
- "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
290
- "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
291
- "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
292
- "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
293
- "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
294
- "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
295
- "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
296
- "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
297
- "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
298
- "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
299
- "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
300
- "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
301
- "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
302
- "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
303
- "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
304
- "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
305
- "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
306
- "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
307
- "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
308
- "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
309
- "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
310
- "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
311
- "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
312
- "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
313
- "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
314
- "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
315
- "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
316
- "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
317
- "model.norm.weight": "model-00002-of-00003.safetensors",
318
  "model.vision_embedding.0.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors",
319
  "model.vision_embedding.0.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors",
320
  "model.vision_embedding.0.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors",
 
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00003-of-00003.safetensors",
8
+ "model.text_model.embed_tokens.weight": "model-00001-of-00003.safetensors",
9
+ "model.text_model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
10
+ "model.text_model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.text_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.text_model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
13
+ "model.text_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
14
+ "model.text_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
15
+ "model.text_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.text_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.text_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
18
+ "model.text_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.text_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.text_model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
21
+ "model.text_model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
22
+ "model.text_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.text_model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.text_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
25
+ "model.text_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
26
+ "model.text_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
27
+ "model.text_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
28
+ "model.text_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
29
+ "model.text_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.text_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
31
+ "model.text_model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
32
+ "model.text_model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.text_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.text_model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.text_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
36
+ "model.text_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
37
+ "model.text_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
38
+ "model.text_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
39
+ "model.text_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
40
+ "model.text_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
41
+ "model.text_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
42
+ "model.text_model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
43
+ "model.text_model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
44
+ "model.text_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
45
+ "model.text_model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
46
+ "model.text_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
47
+ "model.text_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
48
+ "model.text_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
49
+ "model.text_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
50
+ "model.text_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
51
+ "model.text_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
52
+ "model.text_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
53
+ "model.text_model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors",
54
+ "model.text_model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
55
+ "model.text_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
56
+ "model.text_model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
57
+ "model.text_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
58
+ "model.text_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
59
+ "model.text_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
60
+ "model.text_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
61
+ "model.text_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
62
+ "model.text_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
63
+ "model.text_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
64
+ "model.text_model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors",
65
+ "model.text_model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
66
+ "model.text_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
67
+ "model.text_model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
68
+ "model.text_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
69
+ "model.text_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
70
+ "model.text_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
71
+ "model.text_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
72
+ "model.text_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
73
+ "model.text_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
74
+ "model.text_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
75
+ "model.text_model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors",
76
+ "model.text_model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
77
+ "model.text_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
78
+ "model.text_model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
79
+ "model.text_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
80
+ "model.text_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
81
+ "model.text_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
82
+ "model.text_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
83
+ "model.text_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
84
+ "model.text_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
85
+ "model.text_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
86
+ "model.text_model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors",
87
+ "model.text_model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
88
+ "model.text_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
89
+ "model.text_model.layers.15.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
90
+ "model.text_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
91
+ "model.text_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
92
+ "model.text_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
93
+ "model.text_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
94
+ "model.text_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
95
+ "model.text_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
96
+ "model.text_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
97
+ "model.text_model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors",
98
+ "model.text_model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
99
+ "model.text_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
100
+ "model.text_model.layers.16.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
101
+ "model.text_model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
102
+ "model.text_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
103
+ "model.text_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
104
+ "model.text_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
105
+ "model.text_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
106
+ "model.text_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
107
+ "model.text_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
108
+ "model.text_model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors",
109
+ "model.text_model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
110
+ "model.text_model.layers.17.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
111
+ "model.text_model.layers.17.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
112
+ "model.text_model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
113
+ "model.text_model.layers.17.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
114
+ "model.text_model.layers.17.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
115
+ "model.text_model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
116
+ "model.text_model.layers.17.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
117
+ "model.text_model.layers.17.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.text_model.layers.17.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.text_model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
120
+ "model.text_model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
121
+ "model.text_model.layers.18.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.text_model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
123
+ "model.text_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
124
+ "model.text_model.layers.18.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
125
+ "model.text_model.layers.18.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
126
+ "model.text_model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
127
+ "model.text_model.layers.18.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
128
+ "model.text_model.layers.18.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
129
+ "model.text_model.layers.18.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
130
+ "model.text_model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
131
+ "model.text_model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.text_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.text_model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.text_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.text_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
136
+ "model.text_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.text_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.text_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
139
+ "model.text_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.text_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.text_model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
142
+ "model.text_model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
143
+ "model.text_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
144
+ "model.text_model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
145
+ "model.text_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
146
+ "model.text_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
147
+ "model.text_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
148
+ "model.text_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
149
+ "model.text_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
150
+ "model.text_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
151
+ "model.text_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
152
+ "model.text_model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
153
+ "model.text_model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
154
+ "model.text_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
155
+ "model.text_model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
156
+ "model.text_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
157
+ "model.text_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
158
+ "model.text_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
159
+ "model.text_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
160
+ "model.text_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
161
+ "model.text_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
162
+ "model.text_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
163
+ "model.text_model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
164
+ "model.text_model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
165
+ "model.text_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
166
+ "model.text_model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
167
+ "model.text_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
168
+ "model.text_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
169
+ "model.text_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
170
+ "model.text_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
171
+ "model.text_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
172
+ "model.text_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
173
+ "model.text_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
174
+ "model.text_model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
175
+ "model.text_model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
176
+ "model.text_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
177
+ "model.text_model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
178
+ "model.text_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
179
+ "model.text_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
180
+ "model.text_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
181
+ "model.text_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
182
+ "model.text_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
183
+ "model.text_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
184
+ "model.text_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
185
+ "model.text_model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
186
+ "model.text_model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
187
+ "model.text_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
188
+ "model.text_model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
189
+ "model.text_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
190
+ "model.text_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
191
+ "model.text_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
192
+ "model.text_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
193
+ "model.text_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
194
+ "model.text_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
195
+ "model.text_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
196
+ "model.text_model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
197
+ "model.text_model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
198
+ "model.text_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
199
+ "model.text_model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
200
+ "model.text_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
201
+ "model.text_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
202
+ "model.text_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
203
+ "model.text_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
204
+ "model.text_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
205
+ "model.text_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
206
+ "model.text_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
207
+ "model.text_model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
208
+ "model.text_model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
209
+ "model.text_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
210
+ "model.text_model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
211
+ "model.text_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
212
+ "model.text_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
213
+ "model.text_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
214
+ "model.text_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
215
+ "model.text_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
216
+ "model.text_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
217
+ "model.text_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
218
+ "model.text_model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
219
+ "model.text_model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
220
+ "model.text_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
221
+ "model.text_model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
222
+ "model.text_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
223
+ "model.text_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
224
+ "model.text_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
225
+ "model.text_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
226
+ "model.text_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
227
+ "model.text_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
228
+ "model.text_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
229
+ "model.text_model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
230
+ "model.text_model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
231
+ "model.text_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
232
+ "model.text_model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
233
+ "model.text_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
234
+ "model.text_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
235
+ "model.text_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
236
+ "model.text_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
237
+ "model.text_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
238
+ "model.text_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
239
+ "model.text_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
240
+ "model.text_model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
241
+ "model.text_model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
242
+ "model.text_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
243
+ "model.text_model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.text_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
245
+ "model.text_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
246
+ "model.text_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
247
+ "model.text_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.text_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
249
+ "model.text_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.text_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.text_model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.text_model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.text_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.text_model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.text_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.text_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
257
+ "model.text_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.text_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.text_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
260
+ "model.text_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
261
+ "model.text_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.text_model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
263
+ "model.text_model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.text_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
265
+ "model.text_model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.text_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
267
+ "model.text_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
268
+ "model.text_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.text_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
270
+ "model.text_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
271
+ "model.text_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.text_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.text_model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.text_model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.text_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.text_model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.text_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
278
+ "model.text_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
279
+ "model.text_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.text_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.text_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
282
+ "model.text_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
283
+ "model.text_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.text_model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
285
+ "model.text_model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.text_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.text_model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
288
+ "model.text_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
289
+ "model.text_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
290
+ "model.text_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.text_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
292
+ "model.text_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
293
+ "model.text_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.text_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.text_model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
296
+ "model.text_model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
297
+ "model.text_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
298
+ "model.text_model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
299
+ "model.text_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
300
+ "model.text_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
301
+ "model.text_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
302
+ "model.text_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
303
+ "model.text_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
304
+ "model.text_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
305
+ "model.text_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
306
+ "model.text_model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
307
+ "model.text_model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
308
+ "model.text_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
309
+ "model.text_model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
310
+ "model.text_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
311
+ "model.text_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
312
+ "model.text_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
313
+ "model.text_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
314
+ "model.text_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
315
+ "model.text_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
316
+ "model.text_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
317
+ "model.text_model.norm.weight": "model-00002-of-00003.safetensors",
318
  "model.vision_embedding.0.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors",
319
  "model.vision_embedding.0.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors",
320
  "model.vision_embedding.0.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors",
modular_isaac.py CHANGED
@@ -1,17 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
 
 
 
3
  from collections import defaultdict
4
- from typing import Any, TypedDict
5
 
6
- import math
7
- import numpy as np
8
  import torch
9
  import torch.nn as nn
10
  import torch.nn.functional as F
11
- import PIL.Image
12
-
13
-
14
  from transformers import (
 
 
15
  AutoTokenizer,
16
  BatchFeature,
17
  Cache,
@@ -21,44 +104,86 @@ from transformers import (
21
  )
22
  from transformers.cache_utils import SlidingWindowCache, StaticCache
23
  from transformers.generation.utils import GenerationMixin
 
 
 
 
 
 
 
 
 
 
 
 
24
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
25
- from transformers.models.qwen3.modeling_qwen3 import Qwen3DecoderLayer, Qwen3Model
26
  from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer
27
- from transformers.processing_utils import ProcessorMixin
28
- from transformers.tokenization_utils import TensorType
29
- from transformers.modeling_attn_mask_utils import AttentionMaskConverter
30
- import re
31
-
32
- from transformers.models.siglip2.modeling_siglip2 import (
33
- Siglip2MLP,
34
- )
35
  from transformers.models.siglip2.configuration_siglip2 import Siglip2VisionConfig
36
- from perceptron.tensorstream import (
37
- Event,
38
- Stream,
39
- TensorStream,
40
- TextType,
41
- VisionType,
42
- create_stream,
43
- group_streams,
44
- )
45
- from perceptron.tensorstream.ops import (
46
- compute_mrope_pos_tensor,
47
- modality_mask,
48
- reconstruct_tensor_stream_from_compact_dict,
49
- slice as ts_slice,
50
- tensor_stream_token_view,
51
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
- class PixelShuffleSiglip2VisionConfig(Siglip2VisionConfig):
55
  """Vision configuration for Isaac with Pixel Shuffle support.
56
 
57
  Extends Siglip2VisionConfig with additional fields for pixel shuffle.
 
 
 
 
 
 
58
  """
59
 
60
- model_type = "pixel_shuffle_siglip2"
61
  base_config_key = "vision_config"
 
62
 
63
  def __init__(
64
  self,
@@ -72,13 +197,261 @@ class PixelShuffleSiglip2VisionConfig(Siglip2VisionConfig):
72
  self.pixel_shuffle_scale_factor = pixel_shuffle_scale_factor
73
  self.num_patches = num_patches
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- def create_cumulative_seq_lengths(seq_sizes: torch.Tensor, device: torch.device) -> tuple[torch.Tensor, int]:
77
- """Create cumulative sequence lengths for variable-length attention."""
78
- cu_seqlens = torch.zeros(len(seq_sizes) + 1, dtype=torch.int32, device=device)
79
- cu_seqlens[1:] = seq_sizes.cumsum(0)
80
- max_seqlen = int(seq_sizes.max().item()) if len(seq_sizes) > 0 else 0
81
- return cu_seqlens, max_seqlen
82
 
83
 
84
  def _max_from_cu(cu: torch.Tensor | None, fallback: int) -> int:
@@ -88,7 +461,53 @@ def _max_from_cu(cu: torch.Tensor | None, fallback: int) -> int:
88
  return int((cu[1:] - cu[:-1]).max().item())
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def flash_attention_document_mask_forward(
 
92
  q_lhd: torch.Tensor, # (L, H, D)
93
  k_lhd: torch.Tensor, # (L, H, D)
94
  v_lhd: torch.Tensor, # (L, H, D)
@@ -144,7 +563,8 @@ def sdpa_document_mask_forward(
144
  v_lhd: torch.Tensor, # (L, H, D)
145
  dropout: float,
146
  scaling: float | None,
147
- cu_seqlens: torch.Tensor | None,
 
148
  ) -> torch.Tensor:
149
  """SDPA with block-diagonal masking for variable-length sequences."""
150
  L, H, D = q_lhd.shape
@@ -155,108 +575,132 @@ def sdpa_document_mask_forward(
155
  V = v_lhd.permute(1, 0, 2).unsqueeze(0)
156
 
157
  # Build block-diagonal mask for variable-length sequences
158
- attn_mask = None
159
- if cu_seqlens is not None:
160
- seq_sizes = (cu_seqlens[1:] - cu_seqlens[:-1]).long()
161
- seg_ids = torch.repeat_interleave(torch.arange(len(seq_sizes), device=q_lhd.device), seq_sizes)
162
- block_mask = seg_ids[:, None] != seg_ids[None, :] # Cross-document attention blocked
163
- attn_mask = torch.where(block_mask, -torch.inf, 0.0).to(q_lhd.dtype).view(1, 1, L, L)
 
 
 
 
 
164
 
165
  Y = F.scaled_dot_product_attention(Q, K, V, attn_mask=attn_mask, dropout_p=dropout, scale=scaling)
166
  return Y.squeeze(0).permute(1, 0, 2) # Back to (L, H, D)
167
 
168
 
169
- class Siglip2VariableSequenceEmbeddings(nn.Module):
170
- def __init__(self, config: PixelShuffleSiglip2VisionConfig):
171
- super().__init__()
172
- self.config = config
173
- self.embed_dim = config.hidden_size
174
- self.patch_size = config.patch_size
175
 
176
- self.patch_embedding = nn.Linear(
177
- in_features=config.num_channels * self.patch_size * self.patch_size,
178
- out_features=self.embed_dim,
179
- )
180
 
181
- self.num_patches = config.num_patches
182
- self.position_embedding_size = int(self.num_patches**0.5)
183
- self.position_embedding = nn.Embedding(self.num_patches, self.embed_dim)
 
184
 
185
- def positional_embeddings(
186
- self, packed_seq_patches: tuple[torch.Tensor, torch.Tensor, torch.Tensor]
187
- ) -> torch.Tensor:
188
- # Prepare positional embeddings grid: (1, embed_dim, h, w)
189
- positional_embeddings = (
190
- self.position_embedding.weight.reshape(self.position_embedding_size, self.position_embedding_size, -1)
191
- .permute(2, 0, 1)
192
- .unsqueeze(0)
193
- )
194
 
195
- _seq_patches, _seq_sizes, spatial_shapes = packed_seq_patches
196
- pos_embeds_list = []
197
- mode = "bilinear"
198
- align_corners = False
199
- antialias = True
200
- for spatial_shape in spatial_shapes:
201
- height, width = spatial_shape
202
- # Guard to ensure height and width are positive for torch.compile
203
- if height > 0 and width > 0:
204
- resized_pos_embed = F.interpolate(
205
- positional_embeddings,
206
- size=(height, width),
207
- mode=mode,
208
- align_corners=align_corners,
209
- antialias=antialias,
210
- )
211
- # Reshape from (1, embed_dim, height, width) to (height*width, embed_dim)
212
- resized_pos_embed = resized_pos_embed.reshape(self.embed_dim, height * width).transpose(0, 1)
213
- else:
214
- # Fallback - should never happen in practice
215
- resized_pos_embed = positional_embeddings.reshape(
216
- self.embed_dim, self.position_embedding_size * self.position_embedding_size
217
- ).transpose(0, 1)[: height * width]
218
- pos_embeds_list.append(resized_pos_embed)
219
 
220
- # Concatenate all positional embeddings along the sequence dimension
221
- pos_embeds = torch.cat(pos_embeds_list, dim=0)
222
- return pos_embeds
223
 
224
- def forward(self, packed_seq_patches: tuple[torch.Tensor, torch.Tensor, torch.Tensor]):
225
- seq_patches, _seq_sizes, _spatial_shapes = packed_seq_patches
 
226
 
227
- # Apply patch embeddings
228
- target_dtype = self.patch_embedding.weight.dtype
229
- patch_embeds = self.patch_embedding(seq_patches.to(dtype=target_dtype))
230
- pos_embeds = self.positional_embeddings(packed_seq_patches)
231
 
232
- # Add positional embeddings to patch embeddings
233
- embeddings = patch_embeds + pos_embeds
234
- return embeddings
 
 
 
 
235
 
 
236
 
237
- class Siglip2VariableLengthAttention(nn.Module):
238
- """Custom attention that supports variable-length sequences with flash attention."""
 
 
 
 
239
 
240
- def __init__(self, config):
241
- super().__init__()
242
- self.config = config
243
- self.embed_dim = config.hidden_size
244
- self.num_heads = config.num_attention_heads
245
- self.head_dim = self.embed_dim // self.num_heads
246
- if self.head_dim * self.num_heads != self.embed_dim:
247
- raise ValueError(
248
- f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:"
249
- f" {self.num_heads})."
250
- )
251
- self.scale = self.head_dim**-0.5
252
- self.dropout = config.attention_dropout
253
 
254
- self.k_proj = nn.Linear(self.embed_dim, self.embed_dim)
255
- self.v_proj = nn.Linear(self.embed_dim, self.embed_dim)
256
- self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
257
- self.out_proj = nn.Linear(self.embed_dim, self.embed_dim)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- def forward(self, hidden_states, cu_seqlens=None, max_seqlen=None):
260
  # Expect packed sequences with batch_size == 1
261
  batch_size, L, _ = hidden_states.shape
262
  if batch_size != 1:
@@ -272,102 +716,326 @@ class Siglip2VariableLengthAttention(nn.Module):
272
  k = self.k_proj(x).view(L, H, D)
273
  v = self.v_proj(x).view(L, H, D)
274
 
275
- attn_impl = getattr(self.config, "_attn_implementation", "flash_attention_3")
276
-
277
- if attn_impl in ("flash_attention_2", "flash_attention_3"):
278
- y_lhd, _ = flash_attention_document_mask_forward(
279
- q,
280
- k,
281
- v,
282
- attention_mask=None,
283
- dropout=p_drop,
284
- scaling=self.scale,
285
- cum_seq_q=cu_seqlens,
286
- cum_seq_k=cu_seqlens,
287
- max_seqlen=max_seqlen,
288
- is_causal=False,
289
- )
290
- else:
291
- y_lhd = sdpa_document_mask_forward(q, k, v, dropout=p_drop, scaling=self.scale, cu_seqlens=cu_seqlens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  # Merge heads and project
294
  y = self.out_proj(y_lhd.reshape(L, self.embed_dim))
295
  return y.unsqueeze(0), None # (1, L, E)
296
 
297
 
298
- class IsaacSiglip2EncoderLayer(nn.Module):
299
- """Siglip2 encoder layer with variable-length attention."""
300
-
301
- def __init__(self, config: PixelShuffleSiglip2VisionConfig):
302
- super().__init__()
303
- self.embed_dim = config.hidden_size
304
- self.self_attn = Siglip2VariableLengthAttention(config)
305
 
306
- self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
307
- self.mlp = Siglip2MLP(config) # Use HF's Siglip2MLP
308
- self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
309
 
310
  def forward(
311
  self,
312
  hidden_states: torch.Tensor,
313
- cu_seqlens: torch.Tensor = None,
314
- max_seqlen: int = None,
315
- ) -> tuple[torch.FloatTensor]:
316
- residual = hidden_states
317
-
318
- hidden_states = self.layer_norm1(hidden_states)
 
 
 
 
 
319
 
320
- hidden_states, attn_weights = self.self_attn(
321
- hidden_states=hidden_states,
322
- cu_seqlens=cu_seqlens,
323
- max_seqlen=max_seqlen,
 
 
324
  )
325
 
326
- hidden_states = residual + hidden_states
 
 
 
 
327
 
328
- residual = hidden_states
329
- hidden_states = self.layer_norm2(hidden_states)
330
- hidden_states = self.mlp(hidden_states)
331
- hidden_states = residual + hidden_states
332
 
333
- return (hidden_states,)
 
334
 
 
 
 
335
 
336
- class IsaacEncoder(nn.Module):
337
- """Encoder using Isaac encoder layers with variable-length attention support."""
 
338
 
339
- def __init__(self, config: PixelShuffleSiglip2VisionConfig):
340
- super().__init__()
341
- self.config = config
342
- self.layers = nn.ModuleList([IsaacSiglip2EncoderLayer(config) for _ in range(config.num_hidden_layers)])
 
 
343
 
 
344
  def forward(
345
  self,
346
  inputs_embeds,
347
- cu_seqlens: torch.Tensor | None = None,
348
- max_seqlen: int | None = None,
349
- output_hidden_states: bool = False,
 
 
 
350
  ):
351
- all_hidden_states = () if output_hidden_states else None
352
 
353
- hidden_states = inputs_embeds
 
 
 
 
 
 
354
 
355
- for encoder_layer in self.layers:
356
- if output_hidden_states:
357
- all_hidden_states = all_hidden_states + (hidden_states,)
 
 
 
 
358
 
359
- layer_outputs = encoder_layer(
360
- hidden_states,
361
- cu_seqlens,
362
- max_seqlen,
363
- )
364
 
365
- hidden_states = layer_outputs[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- if output_hidden_states:
368
- all_hidden_states = all_hidden_states + (hidden_states,)
369
 
370
- return hidden_states, all_hidden_states, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
 
373
  def create_pixel_shuffle_index_map(
@@ -397,16 +1065,19 @@ def create_pixel_shuffle_index_map(
397
  if device is None:
398
  device = seq_sizes.device
399
 
400
- r = int(scale_factor)
401
- if r < 2:
402
  raise ValueError("`scale_factor` must be ≥ 2")
403
 
404
- # Safety: all spatial dims must be divisible by r
405
  # Cannot run under torch compile fullgraph mode hence
406
- if not torch.compiler.is_compiling():
407
- if not ((token_grids[:, 0] % r == 0).all() and (token_grids[:, 1] % r == 0).all()):
 
 
408
  raise AssertionError(
409
- f"Every (H,W) in `token_grids` must be divisible by scale_factor={r}, got {token_grids.tolist()}"
 
410
  )
411
 
412
  gather_chunks: list[torch.Tensor] = []
@@ -418,19 +1089,21 @@ def create_pixel_shuffle_index_map(
418
  grid = grid.view(h, w) # (H, W)
419
 
420
  # -------- identical ordering to your fixed-res routine --------
421
- # Step 1: split width into blocks of r
422
- grid = grid.view(h, w // r, r) # (H, W/r, r)
423
- # Step 2: now split height into blocks of r
424
- grid = grid.view(h // r, r, w // r, r) # (H/r, r, W/r, r)
425
- # Step 3: final permutation to (H/r, W/r, r, r)
426
- grid = grid.permute(0, 2, 1, 3).contiguous() # (H/r, W/r, r, r)
427
- # Step 4: each (r, r) block forms one output token
428
- gather_chunks.append(grid.reshape(-1, r * r)) # (H*W / r², r²)
 
 
429
 
430
  tok_offset += seq_len
431
 
432
  # Concatenate over all images in the packed batch
433
- gather_idx = torch.cat(gather_chunks, dim=0) # (Σ_i HᵢWᵢ/r², )
434
  return gather_idx
435
 
436
 
@@ -469,7 +1142,7 @@ def pixel_shuffle_varlen(
469
  x_ = x # (seq, embed)
470
 
471
  embed_dim = x_.size(-1)
472
- r = int(scale_factor)
473
 
474
  # Calculate seq_sizes from token_grids
475
  seq_sizes = torch.prod(token_grids, dim=-1)
@@ -478,15 +1151,15 @@ def pixel_shuffle_varlen(
478
  gather_idx = create_pixel_shuffle_index_map(
479
  seq_sizes=seq_sizes,
480
  token_grids=token_grids,
481
- scale_factor=r,
482
  device=x_.device,
483
- ) # (new_seq, )
484
 
485
- # Gather → (new_seq, r², embed_dim)
486
  gathered = x_[gather_idx] # fancy indexing keeps gradient
487
 
488
- # Merge the group dimension into channels to finish the shuffle
489
- out = gathered.reshape(gathered.size(0), embed_dim * r * r)
490
 
491
  # Restore batch dimension if needed
492
  if keep_batch_dim:
@@ -494,12 +1167,12 @@ def pixel_shuffle_varlen(
494
  return out
495
 
496
 
497
- class Siglip2SequenceVisionTransformer(nn.Module):
498
- def __init__(self, config: PixelShuffleSiglip2VisionConfig):
499
  super().__init__()
500
  self.config = config
501
- self.embeddings = Siglip2VariableSequenceEmbeddings(config)
502
- self.encoder = IsaacEncoder(config)
503
  self.post_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
504
  self.pixel_shuffle_scale_factor = config.pixel_shuffle_scale_factor
505
 
@@ -508,20 +1181,24 @@ class Siglip2SequenceVisionTransformer(nn.Module):
508
  seq_sizes = torch.prod(token_grids, dim=-1)
509
 
510
  # Get embeddings from packed sequence
511
- hidden_states = self.embeddings((seq_patches, seq_sizes, token_grids))
512
 
513
  # Add a pseudo batch dimension for the encoder
514
  hidden_states = hidden_states.unsqueeze(0)
515
 
516
  # Generate cumulative sequence lengths for variable-length attention
517
- cu_seqlens, max_seqlen = create_cumulative_seq_lengths(seq_sizes, hidden_states.device)
 
 
518
 
519
  # Pass through encoder with variable-length attention parameters
520
- hidden_states, _, _ = self.encoder(
521
  inputs_embeds=hidden_states,
522
  cu_seqlens=cu_seqlens,
523
  max_seqlen=max_seqlen,
 
524
  )
 
525
 
526
  # Apply final layer normalization
527
  hidden_states = self.post_layernorm(hidden_states)
@@ -539,44 +1216,17 @@ class Siglip2SequenceVisionTransformer(nn.Module):
539
  return hidden_states
540
 
541
 
542
- # ============================================================================
543
- # Configuration
544
- # ============================================================================
545
-
546
- MAX_PIXELS = 60_000_000 # 60‑megapixel ceiling ≈ 8200 × 7300 px
547
-
548
- # Vision preprocessing constants
549
- VISION_MEAN = (0.5, 0.5, 0.5)
550
- VISION_STD = (0.5, 0.5, 0.5)
551
- VISION_SCALE = 1 / 255
552
-
553
-
554
- def _make_writeable(arr: np.ndarray) -> np.ndarray:
555
- """Return *arr* itself if it is already writeable, otherwise try to flip the
556
- write flag in-place and finally fall back to `arr.copy()`.
557
- This guarantees the buffer handed to `torch.from_numpy()` is always
558
- writeable, silencing the PyTorch warning about undefined behaviour.
559
- """
560
- if arr.flags.writeable:
561
- return arr
562
-
563
- # First, try the cheap path — in‑place flag toggle (works for mmap'd arrays
564
- # and some shared memory buffers):
565
- try:
566
- arr.setflags(write=True)
567
- return arr # success: no data copy
568
- except ValueError:
569
- # Buffer is inherently read‑only (e.g. backed by PyAV / PIL): make copy
570
- return arr.copy()
571
-
572
-
573
- def extract_image_pil(image: PIL.Image.Image) -> torch.Tensor | None:
574
- if image.width * image.height > MAX_PIXELS:
575
- raise ValueError(f"Image (w={image.width}, h={image.height}) > MAX=`{MAX_PIXELS}`")
576
- img = image if image.mode == "RGB" else image.convert("RGB")
577
- arr = np.asarray(img)
578
- arr = _make_writeable(arr)
579
- return torch.from_numpy(arr)
580
 
581
 
582
  def get_image_size_for_max_num_patches(
@@ -611,13 +1261,6 @@ def get_image_size_for_max_num_patches(
611
  and respect both the maximum and optional minimum patch-count constraints.
612
  """
613
 
614
- def get_scaled_image_size(scale, original_size, patch_size, pixel_shuffle_scale):
615
- scaled_size = scale * original_size
616
- divisor = patch_size * pixel_shuffle_scale
617
- scaled_size = math.ceil(scaled_size / divisor) * divisor
618
- scaled_size = max(divisor, scaled_size)
619
- return int(scaled_size)
620
-
621
  # Ensure divisibility
622
  divisor = patch_size * pixel_shuffle_scale
623
  adjusted_height = math.ceil(image_height / divisor) * divisor
@@ -663,37 +1306,6 @@ def get_image_size_for_max_num_patches(
663
  return target_height, target_width
664
 
665
 
666
- _MEAN_TENSOR = torch.tensor(VISION_MEAN, dtype=torch.float32).view(1, 1, 1, -1)
667
- _STD_TENSOR = torch.tensor(VISION_STD, dtype=torch.float32).view(1, 1, 1, -1)
668
-
669
-
670
- def prepare_image_tensor(
671
- image: torch.Tensor,
672
- scale: float = VISION_SCALE,
673
- ) -> torch.Tensor:
674
- r"""Standardize RGB images prior to patch extraction via rescaling and whitening.
675
-
676
- Args:
677
- image (`torch.Tensor`):
678
- Tensor with shape `(..., height, width, 3)` containing RGB values. The tensor is converted to floating
679
- point if needed.
680
- scale (`float`, *optional*, defaults to `VISION_SCALE`):
681
- Scalar multiplier applied before normalization.
682
- Returns:
683
- `torch.Tensor`: Normalized tensor with the same shape as the input and dtype `torch.float32`.
684
- """
685
- if not torch.is_floating_point(image):
686
- image = image.float()
687
- rescaled = image * scale
688
-
689
- # Use precomputed tensors and move to the correct device if needed
690
- mean_tensor = _MEAN_TENSOR.to(image.device)
691
- std_tensor = _STD_TENSOR.to(image.device)
692
-
693
- normalized = (rescaled - mean_tensor) / std_tensor
694
- return normalized
695
-
696
-
697
  def patchify_vision(image: torch.Tensor, patch_size: int) -> torch.Tensor:
698
  r"""Convert normalized images into flattened ViT-style patches.
699
 
@@ -719,184 +1331,90 @@ def patchify_vision(image: torch.Tensor, patch_size: int) -> torch.Tensor:
719
  return patches
720
 
721
 
722
- def process_vision_for_patches(
723
- images: torch.Tensor,
724
- patch_size: int,
725
- max_num_patches: int,
726
- min_num_patches: int | None = None,
727
- pixel_shuffle_scale: int = 1,
728
- ) -> tuple[torch.Tensor, list[int]]:
729
- r"""Resize, normalize, and patchify RGB images for the vision encoder.
730
-
731
- Args:
732
- images (`torch.Tensor`):
733
- Either `(height, width, channels)` for a single image or `(num_images, height, width, channels)` for a
734
- batch. Channels are expected to be RGB.
735
- patch_size (`int`):
736
- Edge length of square patches; implictly controls resize grid granularity.
737
- max_num_patches (`int`):
738
- Maximum number of patches allowed after resizing.
739
- min_num_patches (`int`, *optional*):
740
- Minimum number of patches. If provided, the routine upsamples images as needed to satisfy the lower bound.
741
- pixel_shuffle_scale (`int`, *optional*, defaults to 1):
742
- pixel shuffle scale factor; influences the target grid that the function produces.
743
-
744
- Returns:
745
- `tuple[torch.Tensor, list[int]]`: A pair `(patches, dims_virtual)` where `patches` has shape
746
- `(num_images, target_h / patch_size, target_w / patch_size, channels * patch_size**2)` and `dims_virtual`
747
- encodes effective `(images, height, width)` dimensions after optional pixel shuffling.
748
- """
749
- # Add batch dim if single image
750
- if images.dim() == 3:
751
- images = images.unsqueeze(0)
752
-
753
- # Permute to channel first for resize
754
- images = images.permute(0, 3, 1, 2)
755
-
756
- # Get target dimensions
757
- _, _, orig_height, orig_width = images.shape
758
- target_height, target_width = get_image_size_for_max_num_patches(
759
- orig_height,
760
- orig_width,
761
- patch_size,
762
- max_num_patches,
763
- min_num_patches=min_num_patches,
764
- pixel_shuffle_scale=pixel_shuffle_scale,
765
- )
766
-
767
- # Resize
768
- images = F.interpolate(
769
- images,
770
- size=(target_height, target_width),
771
- mode="bilinear",
772
- align_corners=False,
773
- )
774
-
775
- # Back to channel last
776
- images = images.permute(0, 2, 3, 1)
777
-
778
- # Normalize
779
- images = prepare_image_tensor(images)
780
-
781
- # Patchify
782
- patches = patchify_vision(images, patch_size=patch_size)
783
-
784
- # Calculate dimensions for the patches
785
- n_images, h_patches, w_patches, _ = patches.shape
786
- dims_virtual = (
787
- [1, h_patches, w_patches]
788
- if pixel_shuffle_scale == 1
789
- else [1, h_patches // pixel_shuffle_scale, w_patches // pixel_shuffle_scale]
790
- )
791
-
792
- return patches, dims_virtual
793
-
794
-
795
- def precompute_inv_freq(theta: float, dim: int) -> torch.Tensor:
796
- """
797
- Returns shape (dim//2,).
798
- """
799
- inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
800
- return inv_freq # type: ignore[return-value]
801
-
802
-
803
- def precompute_cos_sin_3d(
804
- position_ids: torch.Tensor, # shape (3, B, T)
805
- inv_freq: torch.Tensor, # shape (dim//2,)
806
- mrope_half_section: list[int], # sum to dim//2
807
- ) -> tuple[torch.Tensor, torch.Tensor]:
808
- r"""Generate 3D rotary embeddings for multi-axis positions.
809
-
810
- Args:
811
- position_ids (`torch.Tensor`):
812
- Tensor of shape `(3, batch_size, seq_len)` containing positional indices for the x/y/t axes.
813
- inv_freq (`torch.Tensor`):
814
- Precomputed inverse frequency vector used to derive rotary phases.
815
- mrope_half_section (`list[int]`):
816
- Sizes the axis-specific frequency blocks.
817
-
818
- Returns:
819
- `tuple[torch.Tensor, torch.Tensor]`: Cosine and sine tensors, each of shape `(batch_size, seq_len, dim)`, ready
820
- to be passed into rotary attention layers.
821
- """
822
- B = position_ids.shape[1]
823
- T = position_ids.shape[2]
824
- dim_half = inv_freq.shape[0]
825
- device = position_ids.device
826
-
827
- # Initialize with full dimension (not half) to match LLaMA
828
- cos_3d = torch.zeros((B, T, dim_half * 2), dtype=torch.float32, device=device)
829
- sin_3d = torch.zeros((B, T, dim_half * 2), dtype=torch.float32, device=device)
830
-
831
- offset = 0
832
- for d in range(3):
833
- block_size = mrope_half_section[d]
834
- freq_slice = inv_freq[offset : offset + block_size] # shape => (block_size,)
835
- # shape => (B, T, block_size)
836
- phase = position_ids[d].unsqueeze(-1).float() * freq_slice
837
-
838
- cos_part = phase.cos()
839
- sin_part = phase.sin()
840
-
841
- # Duplicate values for both halves of the dimension
842
- cos_3d[:, :, offset : offset + block_size] = cos_part
843
- cos_3d[:, :, dim_half + offset : dim_half + offset + block_size] = cos_part
844
- sin_3d[:, :, offset : offset + block_size] = sin_part
845
- sin_3d[:, :, dim_half + offset : dim_half + offset + block_size] = sin_part
846
-
847
- offset += block_size
848
-
849
- return cos_3d, sin_3d
850
-
851
-
852
- class RopeScaling(TypedDict, total=False):
853
- rope_type: str
854
- factor: float
855
- mrope_section: list[int]
856
- mrope_interleaved: bool
857
- low_freq_factor: float
858
- high_freq_factor: float
859
- original_max_position_embeddings: int
860
-
861
-
862
  class IsaacConfig(Qwen3Config):
863
  """Configuration class for Isaac multimodal model."""
864
 
865
  model_type = "isaac"
866
- sub_configs = {"vision_config": PixelShuffleSiglip2VisionConfig}
 
867
 
868
  def __init__(
869
  self,
870
- vision_config=None,
871
- vision_patch_size: int = 16,
872
- vision_max_num_patches: int = 256,
873
- vision_min_num_patches: int | None = None,
874
- pixel_shuffle_scale: int = 1,
875
  max_sequence_length: int = 16384,
876
  vision_token: str = "<image>",
877
- vision_attn_implementation: str | None = None,
878
  **kwargs,
879
  ):
880
- super().__init__(**kwargs)
 
 
 
 
 
 
 
 
 
881
 
882
- # Handle vision config - either dict or PixelShuffleSiglip2VisionConfig instance
 
 
 
 
 
 
 
 
 
883
  if isinstance(vision_config, dict):
884
  self.vision_config = self.sub_configs["vision_config"](**vision_config)
 
 
885
  elif vision_config is None:
886
  self.vision_config = self.sub_configs["vision_config"]()
887
- else:
888
- self.vision_config = vision_config
889
 
890
- # EventStreamProcessor parameters (for backward compatibility)
891
- self.video_patch_size = vision_patch_size
892
- self.vision_max_num_patches = vision_max_num_patches
893
- self.vision_min_num_patches = vision_min_num_patches
894
- self.pixel_shuffle_scale = pixel_shuffle_scale
895
 
896
  # Processing parameters
897
  self.max_sequence_length = max_sequence_length
898
  self.vision_token = vision_token
899
- self.vision_attn_implementation = vision_attn_implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
 
901
 
902
  # ============================================================================
@@ -948,43 +1466,50 @@ def create_text_event(tokenizer: AutoTokenizer, text: str, time: float = 0.0) ->
948
 
949
 
950
  class IsaacProcessor(ProcessorMixin):
951
- attributes = ["tokenizer"]
 
952
  tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
 
953
 
954
  def __init__(
955
  self,
956
- tokenizer: Qwen2Tokenizer,
957
- config: IsaacConfig | dict,
958
- ):
959
- super().__init__(tokenizer)
960
- self.tokenizer = tokenizer
 
 
 
 
 
961
 
962
  if isinstance(config, dict):
963
  config = IsaacConfig(**config)
964
- self.config = config
965
 
966
- # Use vision token from config
967
- self.vision_token = config.vision_token
 
 
968
 
969
- # Processing parameters
970
- self.max_sequence_length = config.max_sequence_length
 
971
 
972
- # Vision processing parameters
973
- self.patch_size = config.video_patch_size
974
- self.max_num_patches = config.vision_max_num_patches
975
- self.min_num_patches = config.vision_min_num_patches
976
- self.pixel_shuffle_scale = config.pixel_shuffle_scale
977
 
978
- def apply_chat_template(
979
- self,
980
- messages: list[dict[str, Any]],
981
- tokenize: bool = False,
982
- add_generation_prompt: bool = False,
983
- **kwargs,
984
- ) -> Any:
985
- return self.tokenizer.apply_chat_template(
986
- messages, tokenize=tokenize, add_generation_prompt=add_generation_prompt, **kwargs
987
- )
 
988
 
989
  def build_event_stream_simple(
990
  self,
@@ -1002,60 +1527,32 @@ class IsaacProcessor(ProcessorMixin):
1002
  for current_time, part in enumerate(parts):
1003
  if part == self.vision_token:
1004
  # Replace vision token with image event
1005
- if image_idx < len(images):
1006
- # Create vision event from PIL image
1007
- image_tensor = extract_image_pil(images[image_idx])
1008
- if image_tensor is not None:
1009
- # Create a vision event with the image tensor
1010
- vision_event = Event(
1011
- data=image_tensor.unsqueeze(0), # HWC format from extract_image_pil
1012
- type=VisionType.image, # I-frame
1013
- time=(current_time, current_time),
1014
- )
1015
- events.append(vision_event)
1016
- image_idx += 1
1017
- elif part: # Non-empty text part
1018
- # tokens = self.text_processor.tokenize(part, add_special_tokens=False)
1019
- text_event = create_text_event(self.tokenizer, part, time=current_time)
1020
- events.append(text_event)
1021
 
1022
- # Process vision events if any
1023
- if any(event.type == VisionType.image for event in events):
1024
- # Separate text and vision events for processing
1025
- text_events = [event for event in events if event.type == TextType.text]
1026
- vision_events = [event for event in events if event.type == VisionType.image]
1027
-
1028
- # Process vision events using functional approach
1029
- processed_vision_events = []
1030
- for vision_event in vision_events:
1031
- # Process the vision data
1032
- patches, dims_virtual = process_vision_for_patches(
1033
- vision_event.data.squeeze(0), # Remove the extra dimension
1034
- patch_size=self.patch_size,
1035
- max_num_patches=self.max_num_patches,
1036
- min_num_patches=self.min_num_patches,
1037
- pixel_shuffle_scale=self.pixel_shuffle_scale,
1038
  )
1039
 
1040
- # Update event with processed data
1041
- vision_event.data = patches.unsqueeze(1) # Add back frame dimension
1042
- vision_event.dims_virtual = dims_virtual
1043
- vision_event.dims_real = (
1044
- dims_virtual
1045
- if self.pixel_shuffle_scale == 1
1046
- else [
1047
- dims_virtual[0],
1048
- dims_virtual[1] * self.pixel_shuffle_scale,
1049
- dims_virtual[2] * self.pixel_shuffle_scale,
1050
- ]
1051
  )
1052
- vision_event.idx_range = (0, math.prod(dims_virtual))
1053
-
1054
- # Flatten the patches
1055
- vision_event.data = vision_event.data.reshape(-1, vision_event.data.shape[-1])
1056
- processed_vision_events.append(vision_event)
1057
-
1058
- events = text_events + processed_vision_events
1059
 
1060
  # Create stream without scheduling (events already in order)
1061
  return create_stream(events, priority=[TextType.text, VisionType.image], schedule=True)
@@ -1155,68 +1652,112 @@ def compute_position_ids_input_ids(input_ids: torch.Tensor) -> torch.Tensor:
1155
 
1156
 
1157
  class IsaacRotaryEmbedding(nn.Module):
 
 
1158
  def __init__(self, config: IsaacConfig, device=None):
1159
  super().__init__()
1160
 
1161
- # Extract dimensions from config
1162
- self.hidden_size = config.hidden_size
1163
- self.num_attention_heads = config.num_attention_heads
1164
- self.head_dim = config.head_dim
 
 
1165
 
1166
- # Get rope_scaling config - use direct access when available
1167
- rope_scaling = getattr(config, "rope_scaling", None) or {}
1168
 
1169
- # Read RopeScaling parameters
1170
- self.rope_type = rope_scaling.get("rope_type", "default")
 
1171
 
1172
- self.mrope_section = [
1173
- self.head_dim // 4, # 2x more for temporal dim
1174
- self.head_dim // 8,
1175
- self.head_dim // 8,
1176
- ]
 
 
 
 
 
 
 
 
 
 
 
1177
 
1178
- rope_base = getattr(config, "rope_theta", 10000.0)
1179
- inv_freq = precompute_inv_freq(rope_base, self.head_dim)
1180
- self.register_buffer("inv_freq", inv_freq, persistent=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1181
 
1182
- def forward(self, position_ids: torch.Tensor, modality_tensor: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
1183
  with torch.no_grad():
1184
- # Ensure non-spatial tokens have 1D rotation equivalence
1185
- not_spatial = ~(modality_tensor == VisionType.image.value)
1186
- # shape is [N, 1]
1187
- data_1d = position_ids[not_spatial][..., 0].unsqueeze(-1)
1188
- # now broadcast it from [N, 1] -> [N, D] so it matches pos[not_spatial] exactly
1189
- data_1d = data_1d.expand(-1, position_ids.shape[-1]) # expand along the last dim
1190
- position_ids = position_ids.clone() # Clone to avoid warning about in-place operations on expanded tensors
1191
- position_ids[not_spatial] = data_1d
1192
- position_ids = position_ids.permute(2, 0, 1) # pos dim first -> (3, B, L)
1193
- cos, sin = precompute_cos_sin_3d(position_ids, self.inv_freq, self.mrope_section)
 
 
 
 
 
1194
 
1195
- return cos, sin
1196
 
 
 
1197
 
1198
- class IsaacModel(Qwen3Model):
1199
  def __init__(self, config: IsaacConfig):
1200
- super().__init__(config)
1201
- text_cfg = getattr(config, "get_text_config", lambda: config)()
1202
- self.layers = torch.nn.ModuleList(
1203
- [Qwen3DecoderLayer(text_cfg, layer_idx) for layer_idx in range(config.num_hidden_layers)]
1204
- )
 
 
 
 
1205
  self.rotary_emb = IsaacRotaryEmbedding(config, device=self.device)
1206
 
1207
- vision_cfg = config.vision_config
1208
- # Use vision_attn_implementation if specified, otherwise fall back to general attn_implementation
1209
- vision_cfg._attn_implementation = (
1210
- config.vision_attn_implementation
1211
- if config.vision_attn_implementation is not None
1212
- else config._attn_implementation
1213
- )
1214
- if vision_cfg is None:
1215
  raise ValueError("IsaacConfig should always have vision_config")
1216
 
1217
- hidden_dim = vision_cfg.hidden_size * (vision_cfg.pixel_shuffle_scale_factor**2)
1218
  self.vision_embedding = nn.Sequential(
1219
- Siglip2SequenceVisionTransformer(vision_cfg),
1220
  nn.Linear(
1221
  hidden_dim,
1222
  4 * hidden_dim,
@@ -1232,10 +1773,37 @@ class IsaacModel(Qwen3Model):
1232
  VisionType: self.embed_vision,
1233
  }
1234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1235
  def embed_text_tokens(self, token_ids: torch.Tensor) -> torch.Tensor:
1236
  """Embed text tokens, squeezing singleton dimensions."""
1237
  # Text events are shaped as (..., 1); squeeze the singleton index dim
1238
- h = self.embed_tokens(token_ids)
1239
  if h.dim() >= 2 and h.size(-2) == 1:
1240
  h = h[..., 0, :]
1241
  return h
@@ -1317,7 +1885,7 @@ class IsaacModel(Qwen3Model):
1317
  elif input_ids is not None and inputs_embeds is not None:
1318
  raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
1319
  elif input_ids is not None:
1320
- inputs_embeds = self.embed_tokens(input_ids)
1321
  # Create text modality tensor if not provided
1322
  if modality_tensor is None:
1323
  batch_size, seq_length = input_ids.shape
@@ -1335,7 +1903,11 @@ class IsaacModel(Qwen3Model):
1335
  position_ids = compute_position_ids_input_ids(input_ids)
1336
 
1337
  # Compute MRoPE position embeddings if we have custom rotary_emb
1338
- cos, sin = self.rotary_emb(position_ids, modality_tensor)
 
 
 
 
1339
  cos = cos.to(inputs_embeds.dtype)
1340
  sin = sin.to(inputs_embeds.dtype)
1341
 
@@ -1348,7 +1920,7 @@ class IsaacModel(Qwen3Model):
1348
  # Initialize hidden states
1349
  hidden_states = inputs_embeds
1350
 
1351
- for decoder_layer in self.layers:
1352
  layer_outputs = decoder_layer(
1353
  hidden_states,
1354
  attention_mask=attention_mask,
@@ -1363,7 +1935,7 @@ class IsaacModel(Qwen3Model):
1363
  hidden_states = layer_outputs[0] if isinstance(layer_outputs, tuple) else layer_outputs
1364
 
1365
  # Final layer norm
1366
- hidden_states = self.norm(hidden_states)
1367
 
1368
  return BaseModelOutputWithPast(
1369
  last_hidden_state=hidden_states,
@@ -1527,15 +2099,13 @@ class IsaacForConditionalGeneration(Qwen3ForCausalLM, GenerationMixin):
1527
  config_class = IsaacConfig
1528
 
1529
  def __init__(self, config: IsaacConfig):
1530
- Qwen3PreTrainedModel.__init__(self, config)
1531
  self.model = IsaacModel(config) # Use our custom model
1532
  self.vocab_size = config.vocab_size
1533
  self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
1534
  # Tracks rotary position offsets computed during a full forward pass so decode steps can reuse them.
1535
  self.rope_deltas = None
1536
 
1537
- self.config = config
1538
-
1539
  def get_rope_index(
1540
  self,
1541
  input_ids: torch.Tensor | None,
@@ -1691,9 +2261,30 @@ class IsaacForConditionalGeneration(Qwen3ForCausalLM, GenerationMixin):
1691
  return True
1692
 
1693
 
 
 
 
 
 
 
 
1694
  __all__ = [
1695
  "IsaacConfig",
1696
  "IsaacModel",
1697
  "IsaacForConditionalGeneration",
 
1698
  "IsaacProcessor",
1699
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Perceptron, Inc. All rights reserved.
2
+ # Perceptron, Inc. Non-Production License (2024-01-01)
3
+
4
+
5
+ ### 1. Scope and acceptance
6
+
7
+ # **1.1. Scope of the Agreement.**
8
+ # This Agreement applies to any use, modification, or Distribution of any Perceptron Model by You, regardless of the source You obtained a copy of such Perceptron Model.
9
+ #
10
+ # **1.2. Acceptance.** By accessing, using, modifying, Distributing a Perceptron Model, or by creating, using or distributing a Derivative of the Perceptron Model, You agree to be bound by this Agreement.
11
+ #
12
+ # **1.3. Acceptance on behalf of a third-party.** If You accept this Agreement on behalf of Your employer or another person or entity, You warrant and represent that You have the authority to act and accept this Agreement on their behalf. In such a case, the word “You” in this Agreement will refer to Your employer or such other person or entity.
13
+ #
14
+ # ## 2. License
15
+ # **2.1. Grant of rights.** Subject to Section 3 below, Perceptron, Inc. hereby grants You a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable, limited license to use, copy, modify, and Distribute under the conditions provided in Section 2.2 below, the Perceptron Model and any Derivatives made by or for Perceptron, Inc. and to create Derivatives of the Perceptron Model.
16
+ #
17
+ # **2.2. Distribution of Perceptron Model and Derivatives made by or for Perceptron, Inc..** Subject to Section 3 below, You may Distribute copies of the Perceptron Model and/or Derivatives made by or for Perceptron, Inc., under the following conditions:
18
+ # - You must make available a copy of this Agreement to third-party recipients of the Perceptron Models and/or Derivatives made by or for Perceptron, Inc. you Distribute, it being specified that any rights to use the Perceptron Models and/or Derivatives made by or for Perceptron, Inc. shall be directly granted by Perceptron, Inc. to said third-party recipients pursuant to the Perceptron, Inc. Non-Production License agreement executed between these parties;
19
+ # - You must retain in all copies of the Perceptron Models the following attribution notice within a “Notice” text file distributed as part of such copies: “Licensed by Perceptron, Inc. under the Perceptron, Inc. Non-Production License”.
20
+ #
21
+ # **2.3. Distribution of Derivatives made by or for You.** Subject to Section 3 below, You may Distribute any Derivatives made by or for You under additional or different terms and conditions, provided that:
22
+ # - In any event, the use and modification of Perceptron Model and/or Derivatives made by or for Perceptron, Inc. shall remain governed by the terms and conditions of this Agreement;
23
+ # - You include in any such Derivatives made by or for You prominent notices stating that You modified the concerned Perceptron Model; and
24
+ # - Any terms and conditions You impose on any third-party recipients relating to Derivatives made by or for You shall neither limit such third-party recipients’ use of the Perceptron Model or any Derivatives made by or for Perceptron, Inc. in accordance with the Perceptron, Inc. Non-Production License nor conflict with any of its terms and conditions.
25
+ #
26
+ # ## 3. Limitations
27
+ # **3.1. Misrepresentation.** You must not misrepresent or imply, through any means, that the Derivatives made by or for You and/or any modified version of the Perceptron Model You Distribute under your name and responsibility is an official product of Perceptron, Inc. or has been endorsed, approved or validated by Perceptron, Inc., unless You are authorized by Us to do so in writing.
28
+ #
29
+ # **3.2. Usage Limitation**
30
+ # - You shall only use the Perceptron Models and Derivatives (whether or not created by Perceptron, Inc.) for testing, research, Personal, or evaluation purposes in Non-Production Environments;
31
+ # - Subject to the foregoing, You shall not supply the Perceptron Models or Derivatives in the course of a commercial activity, whether in return for payment or free of charge, in any medium or form, including but not limited to through a hosted or managed service (e.g. SaaS, cloud instances, etc.), or behind a software layer.
32
+ #
33
+ # **3.3. Usage not permitted under this Agreement.** If You want to use a Perceptron Model or a Derivative for any purpose that is not expressly authorized under this Agreement, You must request a license from Perceptron, Inc., which Perceptron, Inc. may grant to You in Perceptron, Inc.’s sole discretion. Please contact Perceptron, Inc. at the following e-mail address if You want to discuss such a license: [email protected]
34
+ #
35
+ # ## 4. Intellectual Property
36
+ # **4.1. Trademarks.** No trademark licenses are granted under this Agreement, and in connection with the Perceptron Models, You may not use any name or mark owned by or associated with Perceptron, Inc. or any of its affiliates, except (i) as required for reasonable and customary use in describing and Distributing the Perceptron Models and Derivatives made by or for Perceptron, Inc. and (ii) for attribution purposes as required by this Agreement.
37
+ #
38
+ # **4.2. Outputs.** We claim no ownership rights in and to the Outputs. You are solely responsible for the Outputs You generate and their subsequent uses in accordance with this Agreement.
39
+ #
40
+ # **4.3. Derivatives.** By entering into this Agreement, You accept that any Derivatives that You may create or that may be created for You shall be subject to the restrictions set out in Section 3 of this Agreement.
41
+ #
42
+ # # 5. Liability
43
+ # **5.1. Limitation of liability.** In no event, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall Perceptron, Inc. be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this Agreement or out of the use or inability to use the Perceptron Models and Derivatives (including but not limited to damages for loss of data, loss of goodwill, loss of expected profit or savings, work stoppage, computer failure or malfunction, or any damage caused by malware or security breaches), even if Perceptron, Inc. has been advised of the possibility of such damages.
44
+ #
45
+ # **5.2. Indemnification.** You agree to indemnify and hold harmless Perceptron, Inc. from and against any claims, damages, or losses arising out of or related to Your use or Distribution of the Perceptron Models and Derivatives.
46
+ #
47
+ # ## 6. Warranty
48
+ # **6.1. Disclaimer.** Unless required by applicable law or agreed to in writing, Perceptron, Inc. provides the Perceptron Models and Derivatives on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. Perceptron, Inc. does not represent nor warrant that the Perceptron Models and Derivatives will be error-free, meet Your or any third party’s requirements, be secure or will allow You or any third party to achieve any kind of result or generate any kind of content. You are solely responsible for determining the appropriateness of using or Distributing the Perceptron Models and Derivatives and assume any risks associated with Your exercise of rights under this Agreement.
49
+ #
50
+ # # 7. Termination
51
+ # **7.1. Term.** This Agreement is effective as of the date of your acceptance of this Agreement or access to the concerned Perceptron Models or Derivatives and will continue until terminated in accordance with the following terms.
52
+ #
53
+ # **7.2. Termination.** Perceptron, Inc. may terminate this Agreement at any time if You are in breach of this Agreement. Upon termination of this Agreement, You must cease to use all Perceptron Models and Derivatives and shall permanently delete any copy thereof. Sections 5, 6, 7 and 8 shall survive the termination of this Agreement.
54
+ #
55
+ # **7.3. Litigation.** If You initiate any legal action or proceedings against Us or any other entity (including a cross-claim or counterclaim in a lawsuit), alleging that the Model or a Derivative, or any part thereof, infringe upon intellectual property or other rights owned or licensable by You, then any licenses granted to You under this Agreement will immediately terminate as of the date such legal action or claim is filed or initiated.
56
+ #
57
+ # # 8. General provisions
58
+ # 8.1. Governing Law. This Agreement will be governed by and construed in accordance with the laws of the State of Washington, without regard to its conflict of law principles.
59
+ #
60
+ # 8.2. Jurisdiction. The state and federal courts located in King County, Washington shall have exclusive jurisdiction over any dispute arising out of or relating to this Agreement, and You and We consent to personal jurisdiction and venue in such courts.
61
+ #
62
+ # **8.3. Severability.** If any provision of this Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein.
63
+ #
64
+ # # 9. Definitions
65
+ # **“Agreement”**: means this Perceptron, Inc. Non-Production License agreement governing the access, use, and Distribution of the Perceptron Models and Derivatives.
66
+ #
67
+ # **“Derivative”**: means any (i) modified version of the Perceptron Model (including but not limited to any customized or fine-tuned version thereof), (ii) work based on the Perceptron Model, or (iii) any other derivative work thereof. For the avoidance of doubt, Outputs are not considered as Derivatives under this Agreement.
68
+ #
69
+ # **“Distribution”**, **“Distributing”**, **“Distribute”** or **“Distributed”**: means providing or making available, by any means, a copy of the Perceptron Models and/or the Derivatives as the case may be, subject to Section 3 of this Agreement.
70
+ #
71
+ # **“Perceptron, Inc.”**, **“We”** or **“Us”**: means Perceptron, Inc., a Delaware corporation with its principal place of business at 10900 NE 8th St Suite 613, Bellevue, WA 98004.
72
+ #
73
+ # **“Perceptron Model”**: means the foundational large language model(s), and its elements which include algorithms, software, instructed checkpoints, parameters, source code (inference code, evaluation code and, if applicable, fine-tuning code) and any other elements associated thereto made available by Perceptron, Inc. under this Agreement, including, if any, the technical documentation, manuals and instructions for the use and operation thereof.
74
+ #
75
+ # **“Non-Production Environment”**: means any setting, use case, or application of the Perceptron Models or Derivatives that expressly excludes live, real-world conditions, commercial operations, revenue-generating activities, or direct interactions with or impacts on end users (such as, for instance, Your employees or customers). Non-Production Environment may include, but is not limited to, any setting, use case, or application for research, development, testing, quality assurance, training, internal evaluation (other than any internal usage by employees in the context of the company’s business activities), and demonstration purposes.
76
+ #
77
+ # **“Outputs”**: means any content generated by the operation of the Perceptron Models or the Derivatives from a prompt (i.e., text instructions) provided by users. For the avoidance of doubt, Outputs do not include any components of a Perceptron Models, such as any fine-tuned versions of the Perceptron Models, the weights, or parameters.
78
+ #
79
+ # **“Personal”**: means any use of a Perceptron Model or a Derivative that is (i) solely for personal, non-profit and non-commercial purposes and (ii) not directly or indirectly connected to any commercial activities, business operations, or employment responsibilities. For illustration purposes, Personal use of a Model or a Derivative does not include any usage by individuals employed in companies in the context of their daily tasks, any activity that is intended to generate revenue, or that is performed on behalf of a commercial entity.
80
+ #
81
+ # **“You”**: means the individual or entity entering into this Agreement with Perceptron, Inc..
82
+
83
  from __future__ import annotations
84
 
85
+ import copy
86
+ import math
87
+ import re
88
  from collections import defaultdict
89
+ from typing import Any, Callable, Optional, Sequence, Union
90
 
91
+ import PIL.Image
 
92
  import torch
93
  import torch.nn as nn
94
  import torch.nn.functional as F
 
 
 
95
  from transformers import (
96
+ AutoImageProcessor,
97
+ AutoModel,
98
  AutoTokenizer,
99
  BatchFeature,
100
  Cache,
 
104
  )
105
  from transformers.cache_utils import SlidingWindowCache, StaticCache
106
  from transformers.generation.utils import GenerationMixin
107
+ from transformers.image_processing_utils_fast import (
108
+ BaseImageProcessorFast,
109
+ SizeDict,
110
+ group_images_by_shape,
111
+ reorder_images,
112
+ DefaultFastImageProcessorKwargs,
113
+ )
114
+ from transformers.image_utils import (
115
+ ChannelDimension,
116
+ PILImageResampling,
117
+ )
118
+ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
119
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
120
+ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
121
  from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer
122
+ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLRotaryEmbedding
 
 
 
 
 
 
 
123
  from transformers.models.siglip2.configuration_siglip2 import Siglip2VisionConfig
124
+ from transformers.models.siglip2.modeling_siglip2 import (
125
+ Siglip2Attention,
126
+ Siglip2Encoder as HFSiglip2Encoder,
127
+ Siglip2EncoderLayer as HFSiglip2EncoderLayer,
128
+ Siglip2VisionEmbeddings as HFSiglip2VisionEmbeddings,
 
 
 
 
 
 
 
 
 
 
129
  )
130
+ from transformers.processing_utils import ProcessorMixin, ProcessingKwargs, Unpack
131
+ from transformers.tokenization_utils import TensorType
132
+ from transformers.utils import auto_docstring
133
+ from transformers.utils.generic import can_return_tuple
134
+
135
+ # Vision preprocessing constants
136
+ from transformers.utils.constants import IMAGENET_STANDARD_MEAN as VISION_MEAN
137
+ from transformers.utils.constants import IMAGENET_STANDARD_STD as VISION_STD
138
+ from transformers.utils.import_utils import is_torchdynamo_compiling
139
+
140
+ try:
141
+ from genesis.public.tensorstream.tensor_stream import (
142
+ Event,
143
+ Stream,
144
+ TensorStream,
145
+ TextType,
146
+ VisionType,
147
+ create_stream,
148
+ group_streams,
149
+ )
150
+ from genesis.public.tensorstream.tensor_stream_utils import (
151
+ compute_mrope_pos_tensor,
152
+ modality_mask,
153
+ reconstruct_tensor_stream_from_compact_dict,
154
+ tensor_stream_token_view,
155
+ )
156
+ from genesis.public.tensorstream.tensor_stream_utils import (
157
+ slice as ts_slice,
158
+ )
159
+ except ModuleNotFoundError as exc: # pragma: no cover - import guard
160
+ raise ModuleNotFoundError(
161
+ "genesis.public.tensorstream is required for the Isaac HuggingFace integration. "
162
+ "Ensure the TensorStream package is installed and on PYTHONPATH."
163
+ ) from exc
164
+
165
+
166
+ _ORIGINAL_ATTENTION_FUNCTIONS: dict[str, Callable[..., tuple[torch.Tensor, Optional[torch.Tensor]]]] = {}
167
+ for _attn_name in ("flash_attention_2", "sdpa", "eager"):
168
+ if _attn_name in ALL_ATTENTION_FUNCTIONS:
169
+ _ORIGINAL_ATTENTION_FUNCTIONS[_attn_name] = ALL_ATTENTION_FUNCTIONS[_attn_name]
170
 
171
 
172
+ class IsaacVisionConfig(Siglip2VisionConfig):
173
  """Vision configuration for Isaac with Pixel Shuffle support.
174
 
175
  Extends Siglip2VisionConfig with additional fields for pixel shuffle.
176
+
177
+ Args:
178
+ pixel_shuffle_scale_factor (`int`, *optional*, defaults to 1):
179
+ Spatial factor applied before pixel shuffle reduces the resolution.
180
+ num_patches (`int`, *optional*, defaults to 256):
181
+ Maximum number of learnable positional embeddings to initialize.
182
  """
183
 
184
+ model_type = "isaac_vision"
185
  base_config_key = "vision_config"
186
+ _attn_implementation: str | None = None
187
 
188
  def __init__(
189
  self,
 
197
  self.pixel_shuffle_scale_factor = pixel_shuffle_scale_factor
198
  self.num_patches = num_patches
199
 
200
+ if self._attn_implementation is None:
201
+ self._attn_implementation = "flash_attention_2"
202
+
203
+
204
+ class IsaacImageProcessorKwargs(DefaultFastImageProcessorKwargs, total=False):
205
+ patch_size: int | None
206
+ max_num_patches: int | None
207
+ min_num_patches: int | None
208
+ pixel_shuffle_scale: int | None
209
+
210
+
211
+ class IsaacProcessorKwargs(ProcessingKwargs, total=False):
212
+ images_kwargs: IsaacImageProcessorKwargs
213
+
214
+
215
+ # Ensure python<=3.9 compatibility with TypedDict overrides.
216
+ IsaacProcessorKwargs.__annotations__["images_kwargs"] = IsaacImageProcessorKwargs
217
+
218
+
219
+ @auto_docstring
220
+ class IsaacImageProcessorFast(BaseImageProcessorFast):
221
+ MAX_PIXELS = 60_000_000 # 60‑megapixel ceiling ≈ 8200 × 7300 px
222
+ r"""Fast torch-based image processor for Isaac vision inputs."""
223
+
224
+ resample = PILImageResampling.BILINEAR
225
+ model_input_names = ["patches", "token_grids"]
226
+ valid_kwargs = IsaacImageProcessorKwargs
227
+ unused_kwargs = ["size", "do_center_crop", "crop_size"]
228
+
229
+ do_resize = True
230
+ size: SizeDict | None = None
231
+ default_to_square: bool | None = None
232
+ do_center_crop = False
233
+ crop_size: SizeDict | None = None
234
+ patch_size: int | None = 16
235
+ max_num_patches: int | None = 256
236
+ min_num_patches: int | None = None
237
+ pixel_shuffle_scale: int | None = 1
238
+ do_pad = False
239
+ pad_size: SizeDict | None = None
240
+ do_rescale = True
241
+ rescale_factor = 1 / 255
242
+ do_normalize = True
243
+ image_mean = list(VISION_MEAN)
244
+ image_std = list(VISION_STD)
245
+ do_convert_rgb = True
246
+ return_tensors = None
247
+ data_format = ChannelDimension.FIRST
248
+ input_data_format = None
249
+ device = None
250
+ disable_grouping = False
251
+ size_divisor: int | None = None
252
+
253
+ def __init__(
254
+ self,
255
+ **kwargs: Unpack[IsaacImageProcessorKwargs],
256
+ ) -> None:
257
+ super().__init__(**kwargs)
258
+
259
+ pixel_shuffle_scale = 1 if self.pixel_shuffle_scale is None else int(self.pixel_shuffle_scale)
260
+ if pixel_shuffle_scale < 1:
261
+ raise ValueError("`pixel_shuffle_scale` must be >= 1")
262
+ self.pixel_shuffle_scale = pixel_shuffle_scale
263
+
264
+
265
+ def _validate_preprocess_kwargs(self, **kwargs):
266
+ # Allow callers to omit resize-related placeholders that BaseImageProcessorFast checks for.
267
+ kwargs.pop("do_resize", None)
268
+ kwargs.pop("size", None)
269
+ kwargs.pop("do_center_crop", None)
270
+ kwargs.pop("crop_size", None)
271
+ kwargs.pop("disable_grouping", None)
272
+ return super()._validate_preprocess_kwargs(**kwargs)
273
+
274
+ def resize(
275
+ self,
276
+ image: "torch.Tensor",
277
+ size: SizeDict,
278
+ interpolation: Optional[Any] = None,
279
+ antialias: bool = True,
280
+ **kwargs,
281
+ ) -> torch.Tensor:
282
+ if size.height is None or size.width is None:
283
+ raise ValueError("IsaacImageProcessorFast requires explicit `height` and `width` when resizing.")
284
+
285
+ resize_mode: Any = interpolation
286
+ if hasattr(resize_mode, "value"):
287
+ resize_mode = resize_mode.value
288
+ elif hasattr(resize_mode, "name"):
289
+ resize_mode = resize_mode.name.lower()
290
+ elif resize_mode is None:
291
+ resize_mode = "bilinear"
292
+
293
+ if isinstance(resize_mode, str):
294
+ mode_key = resize_mode.lower()
295
+ else:
296
+ mode_key = resize_mode
297
+
298
+ resize_kwargs: dict[str, Any] = {}
299
+ if mode_key in {"linear", "bilinear", "bicubic", "trilinear"}:
300
+ resize_kwargs["align_corners"] = False
301
+
302
+ return F.interpolate(
303
+ image,
304
+ size=(size.height, size.width),
305
+ mode=resize_mode,
306
+ **resize_kwargs,
307
+ )
308
+
309
+ def _preprocess(
310
+ self,
311
+ images: list["torch.Tensor"],
312
+ do_resize: bool,
313
+ size: Optional[SizeDict],
314
+ interpolation: Optional[Any],
315
+ do_center_crop: bool,
316
+ crop_size: Optional[SizeDict],
317
+ do_rescale: Optional[bool],
318
+ rescale_factor: Optional[float],
319
+ do_normalize: Optional[bool],
320
+ image_mean: Optional[Union[float, Sequence[float]]],
321
+ image_std: Optional[Union[float, Sequence[float]]],
322
+ disable_grouping: Optional[bool] = None,
323
+ return_tensors: Optional[Union[str, TensorType]] = None,
324
+ do_pad: Optional[bool] = None,
325
+ pad_size: Optional[SizeDict] = None,
326
+ *,
327
+ patch_size: int | None = None,
328
+ max_num_patches: int | None = None,
329
+ min_num_patches: int | None = None,
330
+ pixel_shuffle_scale: int | None = None,
331
+ **kwargs,
332
+ ) -> BatchFeature:
333
+ if do_center_crop:
334
+ raise ValueError("`do_center_crop` is not supported by IsaacImageProcessorFast.")
335
+ if do_pad:
336
+ raise ValueError("`do_pad` is not supported by IsaacImageProcessorFast.")
337
+
338
+
339
+ grouped_images, grouped_images_index = group_images_by_shape(images, disable_grouping=disable_grouping)
340
+ processed_patches_grouped: dict[tuple[int, ...], torch.Tensor] = {}
341
+ token_grids_grouped: dict[tuple[int, ...], torch.Tensor] = {}
342
+ virtual_dims_grouped: dict[tuple[int, ...], torch.Tensor] = {}
343
+ real_dims_grouped: dict[tuple[int, ...], torch.Tensor] = {}
344
+
345
+ for shape, stacked_images in grouped_images.items():
346
+ if stacked_images.ndim != 4:
347
+ raise ValueError("Expected batched channel-first image tensors.")
348
+
349
+ batch_size, channels, original_height, original_width = stacked_images.shape
350
+
351
+ if bool(self.do_convert_rgb) and channels == 1:
352
+ stacked_images = stacked_images.repeat(1, 3, 1, 1)
353
+ channels = 3
354
+
355
+ if original_height * original_width > self.MAX_PIXELS:
356
+ raise ValueError(
357
+ f"Image (w={original_width}, h={original_height}) > MAX=`{self.MAX_PIXELS}`"
358
+ )
359
+
360
+ target_height, target_width = get_image_size_for_max_num_patches(
361
+ original_height,
362
+ original_width,
363
+ patch_size,
364
+ max_num_patches,
365
+ min_num_patches=min_num_patches,
366
+ pixel_shuffle_scale=pixel_shuffle_scale,
367
+ )
368
+
369
+ if do_resize:
370
+ resize_size = SizeDict(height=target_height, width=target_width)
371
+ image_batch = self.resize(
372
+ image=stacked_images,
373
+ size=resize_size,
374
+ interpolation=interpolation,
375
+ )
376
+ else:
377
+ if ((original_height % patch_size) != 0) or ((original_width % patch_size) != 0):
378
+ raise ValueError(
379
+ "Image dimensions must be divisible by patch_size when resize is disabled."
380
+ )
381
+ image_batch = stacked_images
382
+ target_height, target_width = original_height, original_width
383
+
384
+ if do_rescale:
385
+ image_batch = self.rescale_and_normalize(
386
+ image_batch,
387
+ do_rescale=do_rescale,
388
+ rescale_factor=rescale_factor,
389
+ do_normalize=do_normalize,
390
+ image_mean=image_mean,
391
+ image_std=image_std,
392
+ )
393
+
394
+ nhwc_images = image_batch.permute(0, 2, 3, 1)
395
+ nhwc_images = _compute_residual_p_frames(nhwc_images, is_p_frame=[False] * batch_size)
396
+
397
+ patches = patchify_vision(nhwc_images, patch_size=patch_size)
398
+ _, height_tokens, width_tokens, _ = patches.shape
399
+
400
+ token_grid = torch.tensor(
401
+ [height_tokens, width_tokens],
402
+ dtype=torch.long,
403
+ device=patches.device,
404
+ ).unsqueeze(0).repeat(batch_size, 1)
405
+
406
+ real_dim = torch.tensor(
407
+ [1, height_tokens, width_tokens],
408
+ dtype=torch.long,
409
+ device=patches.device,
410
+ ).unsqueeze(0).repeat(batch_size, 1)
411
+
412
+ if pixel_shuffle_scale > 1:
413
+ if (height_tokens % pixel_shuffle_scale) or (width_tokens % pixel_shuffle_scale):
414
+ raise ValueError(
415
+ "Spatial dimensions must be divisible by pixel_shuffle_scale when pixel shuffle is enabled."
416
+ )
417
+ virtual_height = height_tokens // pixel_shuffle_scale
418
+ virtual_width = width_tokens // pixel_shuffle_scale
419
+ else:
420
+ virtual_height = height_tokens
421
+ virtual_width = width_tokens
422
+
423
+ virtual_dim = torch.tensor(
424
+ [1, virtual_height, virtual_width],
425
+ dtype=torch.long,
426
+ device=patches.device,
427
+ ).unsqueeze(0).repeat(batch_size, 1)
428
+
429
+ processed_patches_grouped[shape] = patches
430
+ token_grids_grouped[shape] = token_grid
431
+ virtual_dims_grouped[shape] = virtual_dim
432
+ real_dims_grouped[shape] = real_dim
433
+
434
+ patches_slices = reorder_images(processed_patches_grouped, grouped_images_index)
435
+ token_grid_slices = reorder_images(token_grids_grouped, grouped_images_index)
436
+ virtual_dim_slices = reorder_images(virtual_dims_grouped, grouped_images_index)
437
+ real_dim_slices = reorder_images(real_dims_grouped, grouped_images_index)
438
+
439
+ patches_tensor = torch.stack(patches_slices, dim=0)
440
+ token_grids_tensor = torch.stack(token_grid_slices, dim=0)
441
+ virtual_dims_tensor = torch.stack(virtual_dim_slices, dim=0)
442
+ real_dims_tensor = torch.stack(real_dim_slices, dim=0)
443
+
444
+ return BatchFeature(
445
+ data={
446
+ "patches": patches_tensor,
447
+ "token_grids": token_grids_tensor,
448
+ "virtual_pixel_size": virtual_dims_tensor,
449
+ "real_pixel_size": real_dims_tensor,
450
+ },
451
+ tensor_type=return_tensors,
452
+ )
453
+
454
 
 
 
 
 
 
 
455
 
456
 
457
  def _max_from_cu(cu: torch.Tensor | None, fallback: int) -> int:
 
461
  return int((cu[1:] - cu[:-1]).max().item())
462
 
463
 
464
+ def build_document_attention_mask(
465
+ cu_seqlens: torch.Tensor | None,
466
+ total_tokens: int,
467
+ dtype: torch.dtype,
468
+ device: torch.device,
469
+ ) -> torch.Tensor | None:
470
+ """Creates an additive attention mask that blocks cross-document attention."""
471
+
472
+ if cu_seqlens is None:
473
+ return None
474
+
475
+ if cu_seqlens.numel() < 2:
476
+ return None
477
+
478
+ seq_sizes = (cu_seqlens[1:] - cu_seqlens[:-1]).long()
479
+ if seq_sizes.numel() == 0:
480
+ return None
481
+
482
+ seg_ids = torch.repeat_interleave(torch.arange(seq_sizes.numel(), device=device), seq_sizes)
483
+ block_mask = seg_ids[:, None] != seg_ids[None, :]
484
+ additive_mask = torch.zeros((total_tokens, total_tokens), dtype=dtype, device=device)
485
+ additive_mask.masked_fill_(block_mask, float("-inf"))
486
+ return additive_mask.view(1, 1, total_tokens, total_tokens)
487
+
488
+
489
+
490
+
491
+ def ensure_document_attention_mask(
492
+ attention_mask: Optional[torch.Tensor],
493
+ cu_seqlens: Optional[torch.Tensor],
494
+ total_tokens: int,
495
+ dtype: torch.dtype,
496
+ device: torch.device,
497
+ ) -> Optional[torch.Tensor]:
498
+ if attention_mask is not None or cu_seqlens is None:
499
+ return attention_mask
500
+
501
+ return build_document_attention_mask(
502
+ cu_seqlens=cu_seqlens,
503
+ total_tokens=total_tokens,
504
+ dtype=dtype,
505
+ device=device,
506
+ )
507
+
508
+
509
  def flash_attention_document_mask_forward(
510
+ module: torch.nn.Module,
511
  q_lhd: torch.Tensor, # (L, H, D)
512
  k_lhd: torch.Tensor, # (L, H, D)
513
  v_lhd: torch.Tensor, # (L, H, D)
 
563
  v_lhd: torch.Tensor, # (L, H, D)
564
  dropout: float,
565
  scaling: float | None,
566
+ attention_mask: torch.Tensor | None = None,
567
+ cu_seqlens: torch.Tensor | None = None,
568
  ) -> torch.Tensor:
569
  """SDPA with block-diagonal masking for variable-length sequences."""
570
  L, H, D = q_lhd.shape
 
575
  V = v_lhd.permute(1, 0, 2).unsqueeze(0)
576
 
577
  # Build block-diagonal mask for variable-length sequences
578
+ attn_mask = attention_mask
579
+ if attn_mask is None:
580
+ attn_mask = build_document_attention_mask(
581
+ cu_seqlens=cu_seqlens,
582
+ total_tokens=L,
583
+ dtype=q_lhd.dtype,
584
+ device=q_lhd.device,
585
+ )
586
+
587
+ if attn_mask is not None and attn_mask.dtype != Q.dtype:
588
+ attn_mask = attn_mask.to(Q.dtype)
589
 
590
  Y = F.scaled_dot_product_attention(Q, K, V, attn_mask=attn_mask, dropout_p=dropout, scale=scaling)
591
  return Y.squeeze(0).permute(1, 0, 2) # Back to (L, H, D)
592
 
593
 
594
+ class IsaacVisionEmbeddings(HFSiglip2VisionEmbeddings):
595
+ """Adapter around SigLIP2 vision embeddings that consumes packed patch sequences."""
 
 
 
 
596
 
597
+ def __init__(self, config: IsaacVisionConfig):
598
+ super().__init__(config)
 
 
599
 
600
+ def forward(self, seq_patches: torch.Tensor, spatial_shapes: torch.Tensor) -> torch.Tensor:
601
+ packed_pixel_values, seq_lengths = self._pack_to_batch(seq_patches, spatial_shapes)
602
+ if packed_pixel_values is None:
603
+ return seq_patches.new_zeros((0, self.embed_dim))
604
 
605
+ embeddings = super().forward(packed_pixel_values, spatial_shapes)
606
+ return self._unpack_from_batch(embeddings, seq_lengths)
 
 
 
 
 
 
 
607
 
608
+ def _pack_to_batch(
609
+ self,
610
+ seq_patches: torch.Tensor,
611
+ spatial_shapes: torch.Tensor,
612
+ ) -> tuple[torch.Tensor | None, torch.Tensor]:
613
+ if seq_patches.ndim != 2:
614
+ raise ValueError("`seq_patches` is expected to be 2D (total_patches, patch_dim).")
615
+ if spatial_shapes.ndim != 2 or spatial_shapes.size(-1) != 2:
616
+ raise ValueError("`spatial_shapes` must have shape (num_images, 2) with (height_tokens, width_tokens).")
617
+
618
+ seq_lengths = spatial_shapes.long().prod(dim=-1)
619
+ total_patches = int(seq_lengths.sum().item())
620
+ if total_patches != seq_patches.size(0):
621
+ raise ValueError(
622
+ "Mismatch between packed patches and spatial shapes: got "
623
+ f"{seq_patches.size(0)} patches but spatial shapes imply {total_patches}."
624
+ )
 
 
 
 
 
 
 
625
 
626
+ batch_size = spatial_shapes.size(0)
627
+ if batch_size == 0:
628
+ return None, seq_lengths
629
 
630
+ max_length = int(seq_lengths.max().item())
631
+ patch_dim = seq_patches.size(-1)
632
+ device = seq_patches.device
633
 
634
+ packed_pixel_values = seq_patches.new_zeros((batch_size, max_length, patch_dim), device=device)
 
 
 
635
 
636
+ start = 0
637
+ for batch_idx, length in enumerate(seq_lengths.tolist()):
638
+ if length == 0:
639
+ continue
640
+ end = start + length
641
+ packed_pixel_values[batch_idx, :length] = seq_patches[start:end]
642
+ start = end
643
 
644
+ return packed_pixel_values, seq_lengths
645
 
646
+ def _unpack_from_batch(self, embeddings: torch.Tensor, seq_lengths: torch.Tensor) -> torch.Tensor:
647
+ output_chunks: list[torch.Tensor] = []
648
+ for batch_idx, length in enumerate(seq_lengths.tolist()):
649
+ if length == 0:
650
+ continue
651
+ output_chunks.append(embeddings[batch_idx, :length])
652
 
653
+ if not output_chunks:
654
+ return embeddings.new_zeros((0, embeddings.size(-1)))
 
 
 
 
 
 
 
 
 
 
 
655
 
656
+ return torch.cat(output_chunks, dim=0)
657
+
658
+
659
+ class IsaacVisionAttention(Siglip2Attention):
660
+ """Custom attention that supports variable-length sequences with flash attention."""
661
+
662
+ ATTENTION_KEY_MAP: dict[str, str] = {
663
+ "flash_attention_2": "isaac_flash_attention_2",
664
+ "flash_attention_3": "isaac_flash_attention_3",
665
+ "isaac_flash_attention_2": "isaac_flash_attention_2",
666
+ "isaac_flash_attention_3": "isaac_flash_attention_3",
667
+ "sdpa": "isaac_sdpa",
668
+ "isaac_sdpa": "isaac_sdpa",
669
+ "eager": "isaac_eager",
670
+ "isaac_eager": "isaac_eager",
671
+ }
672
+
673
+ def __init__(self, vision_config):
674
+ super().__init__(vision_config)
675
+ self.vision_config = vision_config
676
+ self._variable_length_metadata = None
677
+
678
+ def _variable_length_context(self, *, cu_seqlens=None, max_seqlen=None):
679
+ """Store packed-sequence metadata for the next forward call."""
680
+ self._variable_length_metadata = (cu_seqlens, max_seqlen)
681
+
682
+ def _consume_variable_length_metadata(self):
683
+ if self._variable_length_metadata is None:
684
+ return None, None
685
+ cu_seqlens, max_seqlen = self._variable_length_metadata
686
+ self._variable_length_metadata = None
687
+ return cu_seqlens, max_seqlen
688
+
689
+ def forward(self, hidden_states, attention_mask=None, **kwargs):
690
+ cu_seqlens = kwargs.pop("cu_seqlens", None)
691
+ max_seqlen = kwargs.pop("max_seqlen", None)
692
+ kwargs.pop("output_attentions", None)
693
+ kwargs.pop("output_hidden_states", None)
694
+ kwargs.pop("return_dict", None)
695
+ if kwargs:
696
+ unexpected = ', '.join(sorted(kwargs))
697
+ raise TypeError(f'Unexpected kwargs for IsaacVisionAttention.forward: {unexpected}')
698
+ cached_cu, cached_max = self._consume_variable_length_metadata()
699
+ if cu_seqlens is None:
700
+ cu_seqlens = cached_cu
701
+ if max_seqlen is None:
702
+ max_seqlen = cached_max
703
 
 
704
  # Expect packed sequences with batch_size == 1
705
  batch_size, L, _ = hidden_states.shape
706
  if batch_size != 1:
 
716
  k = self.k_proj(x).view(L, H, D)
717
  v = self.v_proj(x).view(L, H, D)
718
 
719
+ attn_impl = getattr(self.vision_config, "_attn_implementation", "flash_attention_3")
720
+
721
+ attn_mask = ensure_document_attention_mask(
722
+ attention_mask,
723
+ cu_seqlens,
724
+ L,
725
+ q.dtype,
726
+ q.device,
727
+ )
728
+
729
+ resolved_key = self.ATTENTION_KEY_MAP.get(attn_impl)
730
+ attention_fn = ALL_ATTENTION_FUNCTIONS.get(resolved_key) if resolved_key is not None else None
731
+ if attention_fn is None:
732
+ raise ValueError(f"Attention implementation {attn_impl} not found.")
733
+
734
+ query_states = q.transpose(0, 1).unsqueeze(0)
735
+ key_states = k.transpose(0, 1).unsqueeze(0)
736
+ value_states = v.transpose(0, 1).unsqueeze(0)
737
+
738
+ attention_kwargs: dict[str, Any] = {
739
+ "dropout": p_drop,
740
+ "scaling": self.scale,
741
+ "is_causal": False,
742
+ }
743
+ if cu_seqlens is not None:
744
+ attention_kwargs["cu_seq_lens_q"] = cu_seqlens
745
+ attention_kwargs["cu_seq_lens_k"] = cu_seqlens
746
+ if max_seqlen is not None:
747
+ attention_kwargs["max_length_q"] = max_seqlen
748
+ attention_kwargs["max_length_k"] = max_seqlen
749
+
750
+ attn_output, _ = attention_fn(
751
+ self,
752
+ query_states,
753
+ key_states,
754
+ value_states,
755
+ attn_mask,
756
+ **attention_kwargs,
757
+ )
758
+
759
+ y_lhd = attn_output.squeeze(0).permute(1, 0, 2).contiguous()
760
 
761
  # Merge heads and project
762
  y = self.out_proj(y_lhd.reshape(L, self.embed_dim))
763
  return y.unsqueeze(0), None # (1, L, E)
764
 
765
 
766
+ class IsaacVisionEncoderLayer(HFSiglip2EncoderLayer):
767
+ """Isaac vision encoder layer with variable-length attention."""
 
 
 
 
 
768
 
769
+ def __init__(self, vision_config: IsaacVisionConfig):
770
+ super().__init__(vision_config)
771
+ self.self_attn = IsaacVisionAttention(vision_config)
772
 
773
  def forward(
774
  self,
775
  hidden_states: torch.Tensor,
776
+ attention_mask: Optional[torch.Tensor] = None,
777
+ cu_seqlens: Optional[torch.Tensor] = None,
778
+ max_seqlen: Optional[int] = None,
779
+ output_attentions: bool = False,
780
+ output_hidden_states: Optional[bool] = None,
781
+ ):
782
+ if cu_seqlens is not None or max_seqlen is not None:
783
+ self.self_attn._variable_length_context(
784
+ cu_seqlens=cu_seqlens,
785
+ max_seqlen=max_seqlen,
786
+ )
787
 
788
+ attention_mask = ensure_document_attention_mask(
789
+ attention_mask,
790
+ cu_seqlens,
791
+ hidden_states.size(1),
792
+ hidden_states.dtype,
793
+ hidden_states.device,
794
  )
795
 
796
+ return super().forward(
797
+ hidden_states,
798
+ attention_mask=attention_mask,
799
+ output_attentions=output_attentions,
800
+ )
801
 
 
 
 
 
802
 
803
+ class IsaacVisionEncoder(HFSiglip2Encoder):
804
+ """Encoder using Isaac encoder layers with variable-length attention support."""
805
 
806
+ def __init__(self, config: IsaacVisionConfig):
807
+ super().__init__(config)
808
+ self.layers = nn.ModuleList([IsaacVisionEncoderLayer(config) for _ in range(config.num_hidden_layers)])
809
 
810
+ def __variable_length_context(self, cu_seqlens, max_seqlen) -> None:
811
+ if cu_seqlens is None and max_seqlen is None:
812
+ return
813
 
814
+ for layer in self.layers:
815
+ if isinstance(layer, IsaacVisionEncoderLayer):
816
+ layer.self_attn._variable_length_context(
817
+ cu_seqlens=cu_seqlens,
818
+ max_seqlen=max_seqlen,
819
+ )
820
 
821
+ @can_return_tuple
822
  def forward(
823
  self,
824
  inputs_embeds,
825
+ attention_mask: Optional[torch.Tensor] = None,
826
+ cu_seqlens: Optional[torch.Tensor] = None,
827
+ max_seqlen: Optional[int] = None,
828
+ output_attentions: Optional[bool] = None,
829
+ output_hidden_states: Optional[bool] = None,
830
+ return_dict: Optional[bool] = None,
831
  ):
832
+ self.__variable_length_context(cu_seqlens, max_seqlen)
833
 
834
+ attention_mask = ensure_document_attention_mask(
835
+ attention_mask,
836
+ cu_seqlens,
837
+ inputs_embeds.size(1),
838
+ inputs_embeds.dtype,
839
+ inputs_embeds.device,
840
+ )
841
 
842
+ return super().forward(
843
+ inputs_embeds,
844
+ attention_mask=attention_mask,
845
+ output_attentions=output_attentions,
846
+ #output_hidden_states=output_hidden_states,
847
+ #return_dict=return_dict,
848
+ )
849
 
 
 
 
 
 
850
 
851
+ def _isaac_flash_attention_forward(
852
+ module: nn.Module,
853
+ query: torch.Tensor,
854
+ key: torch.Tensor,
855
+ value: torch.Tensor,
856
+ attention_mask: Optional[torch.Tensor],
857
+ dropout: float = 0.0,
858
+ scaling: Optional[float] = None,
859
+ is_causal: bool = False,
860
+ **kwargs,
861
+ ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
862
+ base_fn = _ORIGINAL_ATTENTION_FUNCTIONS.get("flash_attention_2")
863
+ if not isinstance(module, IsaacVisionAttention) or base_fn is None:
864
+ if base_fn is None:
865
+ raise ValueError("Base flash attention function unavailable for fallback.")
866
+ return base_fn(
867
+ module,
868
+ query,
869
+ key,
870
+ value,
871
+ attention_mask,
872
+ dropout=dropout,
873
+ scaling=scaling,
874
+ is_causal=is_causal,
875
+ **kwargs,
876
+ )
877
+
878
+ if query.dim() != 4 or query.size(0) != 1:
879
+ raise ValueError("IsaacVisionAttention expects packed sequences with batch size 1 when using packed attention.")
880
+
881
+ _, num_heads, seq_len, head_dim = query.shape
882
+ q_lhd = query.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
883
+ k_lhd = key.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
884
+ v_lhd = value.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
885
+
886
+ cum_seq_q = kwargs.get("cu_seq_lens_q")
887
+ cum_seq_k = kwargs.get("cu_seq_lens_k", cum_seq_q)
888
+ max_seqlen = kwargs.get("max_length_q")
889
+
890
+ effective_dropout = dropout if dropout is not None else (module.dropout if module.training else 0.0)
891
+ effective_scaling = module.scale if scaling is None else scaling
892
+
893
+ attn_mask = attention_mask
894
+ if attn_mask is None:
895
+ attn_mask = build_document_attention_mask(
896
+ cu_seqlens=cum_seq_q,
897
+ total_tokens=seq_len,
898
+ dtype=q_lhd.dtype,
899
+ device=q_lhd.device,
900
+ )
901
+
902
+ attn_output_lhd, attn_weights = flash_attention_document_mask_forward(
903
+ module,
904
+ q_lhd,
905
+ k_lhd,
906
+ v_lhd,
907
+ attention_mask=attn_mask,
908
+ dropout=effective_dropout,
909
+ scaling=effective_scaling,
910
+ cum_seq_q=cum_seq_q,
911
+ cum_seq_k=cum_seq_k,
912
+ max_seqlen=max_seqlen,
913
+ is_causal=is_causal,
914
+ )
915
+
916
+ attn_output = attn_output_lhd.permute(1, 0, 2).unsqueeze(0)
917
+ return attn_output, attn_weights
918
+
919
+
920
+ def _isaac_sdpa_forward(
921
+ module: nn.Module,
922
+ query: torch.Tensor,
923
+ key: torch.Tensor,
924
+ value: torch.Tensor,
925
+ attention_mask: Optional[torch.Tensor],
926
+ dropout: float = 0.0,
927
+ scaling: Optional[float] = None,
928
+ is_causal: bool = False,
929
+ **kwargs,
930
+ ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
931
+ base_fn = _ORIGINAL_ATTENTION_FUNCTIONS.get("sdpa")
932
+ if not isinstance(module, IsaacVisionAttention) or base_fn is None:
933
+ if base_fn is None:
934
+ raise ValueError("Base SDPA function unavailable for fallback.")
935
+ return base_fn(
936
+ module,
937
+ query,
938
+ key,
939
+ value,
940
+ attention_mask,
941
+ dropout=dropout,
942
+ scaling=scaling,
943
+ is_causal=is_causal,
944
+ **kwargs,
945
+ )
946
+
947
+ if query.dim() != 4 or query.size(0) != 1:
948
+ raise ValueError("IsaacVisionAttention expects packed sequences with batch size 1 when using packed attention.")
949
+
950
+ _, num_heads, seq_len, head_dim = query.shape
951
+ q_lhd = query.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
952
+ k_lhd = key.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
953
+ v_lhd = value.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
954
+
955
+ cum_seq = kwargs.get("cu_seq_lens_q")
956
+ effective_dropout = dropout if dropout is not None else (module.dropout if module.training else 0.0)
957
+ effective_scaling = module.scale if scaling is None else scaling
958
+
959
+ attn_mask = attention_mask
960
+ if attn_mask is None:
961
+ attn_mask = build_document_attention_mask(
962
+ cu_seqlens=cum_seq,
963
+ total_tokens=seq_len,
964
+ dtype=q_lhd.dtype,
965
+ device=q_lhd.device,
966
+ )
967
+
968
+ attn_output_lhd = sdpa_document_mask_forward(
969
+ q_lhd,
970
+ k_lhd,
971
+ v_lhd,
972
+ dropout=effective_dropout,
973
+ scaling=effective_scaling,
974
+ attention_mask=attn_mask,
975
+ cu_seqlens=cum_seq,
976
+ )
977
 
978
+ attn_output = attn_output_lhd.permute(1, 0, 2).unsqueeze(0)
979
+ return attn_output, None
980
 
981
+
982
+ def _isaac_eager_forward(
983
+ module: nn.Module,
984
+ query: torch.Tensor,
985
+ key: torch.Tensor,
986
+ value: torch.Tensor,
987
+ attention_mask: Optional[torch.Tensor],
988
+ dropout: float = 0.0,
989
+ scaling: Optional[float] = None,
990
+ is_causal: bool = False,
991
+ **kwargs,
992
+ ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
993
+ base_fn = _ORIGINAL_ATTENTION_FUNCTIONS.get("eager")
994
+ if not isinstance(module, IsaacVisionAttention) or base_fn is None:
995
+ if base_fn is None:
996
+ raise ValueError("Base eager attention function unavailable for fallback.")
997
+ return base_fn(
998
+ module,
999
+ query,
1000
+ key,
1001
+ value,
1002
+ attention_mask,
1003
+ dropout=dropout,
1004
+ scaling=scaling,
1005
+ is_causal=is_causal,
1006
+ **kwargs,
1007
+ )
1008
+
1009
+ if query.dim() != 4 or query.size(0) != 1:
1010
+ raise ValueError("IsaacVisionAttention expects packed sequences with batch size 1 when using packed attention.")
1011
+
1012
+ _, num_heads, seq_len, head_dim = query.shape
1013
+ q_lhd = query.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
1014
+ k_lhd = key.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
1015
+ v_lhd = value.transpose(1, 2).reshape(seq_len, num_heads, head_dim)
1016
+
1017
+ effective_scaling = module.scale if scaling is None else scaling
1018
+ attn_weights = torch.matmul(q_lhd, k_lhd.transpose(1, 2)) * effective_scaling
1019
+
1020
+ if attention_mask is not None:
1021
+ mask = attention_mask
1022
+ if mask.dim() == 4:
1023
+ mask = mask.squeeze(0).squeeze(0)
1024
+ attn_weights = attn_weights + mask
1025
+
1026
+ attn_weights = torch.softmax(attn_weights, dim=-1)
1027
+ if dropout and module.training:
1028
+ attn_weights = F.dropout(attn_weights, p=dropout, training=True)
1029
+
1030
+ attn_output_lhd = torch.matmul(attn_weights, v_lhd)
1031
+ attn_output = attn_output_lhd.permute(1, 0, 2).unsqueeze(0)
1032
+ return attn_output, attn_weights
1033
+
1034
+
1035
+ ALL_ATTENTION_FUNCTIONS.register("isaac_flash_attention_2", _isaac_flash_attention_forward)
1036
+ ALL_ATTENTION_FUNCTIONS.register("isaac_flash_attention_3", _isaac_flash_attention_forward)
1037
+ ALL_ATTENTION_FUNCTIONS.register("isaac_sdpa", _isaac_sdpa_forward)
1038
+ ALL_ATTENTION_FUNCTIONS.register("isaac_eager", _isaac_eager_forward)
1039
 
1040
 
1041
  def create_pixel_shuffle_index_map(
 
1065
  if device is None:
1066
  device = seq_sizes.device
1067
 
1068
+ scale_factor = int(scale_factor)
1069
+ if scale_factor < 2:
1070
  raise ValueError("`scale_factor` must be ≥ 2")
1071
 
1072
+ # Safety: all spatial dims must be divisible by the scale factor
1073
  # Cannot run under torch compile fullgraph mode hence
1074
+ if not is_torchdynamo_compiling():
1075
+ if not (
1076
+ (token_grids[:, 0] % scale_factor == 0).all() and (token_grids[:, 1] % scale_factor == 0).all()
1077
+ ):
1078
  raise AssertionError(
1079
+ "Every (H,W) in `token_grids` must be divisible by "
1080
+ f"scale_factor={scale_factor}, got {token_grids.tolist()}"
1081
  )
1082
 
1083
  gather_chunks: list[torch.Tensor] = []
 
1089
  grid = grid.view(h, w) # (H, W)
1090
 
1091
  # -------- identical ordering to your fixed-res routine --------
1092
+ # Step 1: split width into blocks of scale_factor
1093
+ grid = grid.view(h, w // scale_factor, scale_factor) # (H, W/scale_factor, scale_factor)
1094
+ # Step 2: now split height into blocks of scale_factor
1095
+ grid = grid.view(h // scale_factor, scale_factor, w // scale_factor, scale_factor)
1096
+ # (H/scale_factor, scale_factor, W/scale_factor, scale_factor)
1097
+ # Step 3: final permutation to (H/scale_factor, W/scale_factor, scale_factor, scale_factor)
1098
+ grid = grid.permute(0, 2, 1, 3).contiguous() # (H/scale_factor, W/scale_factor, scale_factor, scale_factor)
1099
+ # Step 4: each (scale_factor, scale_factor) block forms one output token
1100
+ gather_chunks.append(grid.reshape(-1, scale_factor * scale_factor))
1101
+ # (H*W / scale_factor**2, scale_factor**2)
1102
 
1103
  tok_offset += seq_len
1104
 
1105
  # Concatenate over all images in the packed batch
1106
+ gather_idx = torch.cat(gather_chunks, dim=0) # (Σ_i HᵢWᵢ/scale_factor**2, scale_factor**2)
1107
  return gather_idx
1108
 
1109
 
 
1142
  x_ = x # (seq, embed)
1143
 
1144
  embed_dim = x_.size(-1)
1145
+ scale_factor = int(scale_factor)
1146
 
1147
  # Calculate seq_sizes from token_grids
1148
  seq_sizes = torch.prod(token_grids, dim=-1)
 
1151
  gather_idx = create_pixel_shuffle_index_map(
1152
  seq_sizes=seq_sizes,
1153
  token_grids=token_grids,
1154
+ scale_factor=scale_factor,
1155
  device=x_.device,
1156
+ ) # (new_seq, scale_factor**2)
1157
 
1158
+ # Gather → (new_seq, scale_factor**2, embed_dim)
1159
  gathered = x_[gather_idx] # fancy indexing keeps gradient
1160
 
1161
+ # Merge the scale_factor**2 group dimension into channels to finish the shuffle
1162
+ out = gathered.reshape(gathered.size(0), embed_dim * scale_factor * scale_factor)
1163
 
1164
  # Restore batch dimension if needed
1165
  if keep_batch_dim:
 
1167
  return out
1168
 
1169
 
1170
+ class IsaacVisionTransformer(nn.Module):
1171
+ def __init__(self, config: IsaacVisionConfig):
1172
  super().__init__()
1173
  self.config = config
1174
+ self.embeddings = IsaacVisionEmbeddings(config)
1175
+ self.encoder = IsaacVisionEncoder(config)
1176
  self.post_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
1177
  self.pixel_shuffle_scale_factor = config.pixel_shuffle_scale_factor
1178
 
 
1181
  seq_sizes = torch.prod(token_grids, dim=-1)
1182
 
1183
  # Get embeddings from packed sequence
1184
+ hidden_states = self.embeddings(seq_patches, token_grids)
1185
 
1186
  # Add a pseudo batch dimension for the encoder
1187
  hidden_states = hidden_states.unsqueeze(0)
1188
 
1189
  # Generate cumulative sequence lengths for variable-length attention
1190
+ cu_seqlens = torch.zeros(seq_sizes.size(0) + 1, dtype=torch.int32, device=hidden_states.device)
1191
+ cu_seqlens[1:] = seq_sizes.cumsum(0)
1192
+ max_seqlen = int(seq_sizes.max().item()) if seq_sizes.numel() > 0 else 0
1193
 
1194
  # Pass through encoder with variable-length attention parameters
1195
+ encoder_outputs = self.encoder(
1196
  inputs_embeds=hidden_states,
1197
  cu_seqlens=cu_seqlens,
1198
  max_seqlen=max_seqlen,
1199
+ return_dict=True,
1200
  )
1201
+ hidden_states = encoder_outputs.last_hidden_state
1202
 
1203
  # Apply final layer normalization
1204
  hidden_states = self.post_layernorm(hidden_states)
 
1216
  return hidden_states
1217
 
1218
 
1219
+ def get_scaled_image_size(
1220
+ scale: float,
1221
+ original_size: int,
1222
+ patch_size: int,
1223
+ pixel_shuffle_scale: int,
1224
+ ) -> int:
1225
+ scaled_size = scale * original_size
1226
+ divisor = patch_size * pixel_shuffle_scale
1227
+ scaled_size = math.ceil(scaled_size / divisor) * divisor
1228
+ scaled_size = max(divisor, scaled_size)
1229
+ return int(scaled_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1230
 
1231
 
1232
  def get_image_size_for_max_num_patches(
 
1261
  and respect both the maximum and optional minimum patch-count constraints.
1262
  """
1263
 
 
 
 
 
 
 
 
1264
  # Ensure divisibility
1265
  divisor = patch_size * pixel_shuffle_scale
1266
  adjusted_height = math.ceil(image_height / divisor) * divisor
 
1306
  return target_height, target_width
1307
 
1308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
  def patchify_vision(image: torch.Tensor, patch_size: int) -> torch.Tensor:
1310
  r"""Convert normalized images into flattened ViT-style patches.
1311
 
 
1331
  return patches
1332
 
1333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334
  class IsaacConfig(Qwen3Config):
1335
  """Configuration class for Isaac multimodal model."""
1336
 
1337
  model_type = "isaac"
1338
+ sub_configs = {"vision_config": IsaacVisionConfig, "text_config": Qwen3Config}
1339
+ image_processor_type = "IsaacImageProcessor"
1340
 
1341
  def __init__(
1342
  self,
1343
+ vision_config: IsaacVisionConfig | None = None,
1344
+ text_config: Qwen3Config | dict | None = None,
1345
+ vision_rescale_factor: float = 1/255,
 
 
1346
  max_sequence_length: int = 16384,
1347
  vision_token: str = "<image>",
 
1348
  **kwargs,
1349
  ):
1350
+ self._rope_scaling: dict[str, Any] | None = None
1351
+ resolved_text_config = kwargs.pop("text_config", text_config)
1352
+ if isinstance(resolved_text_config, Qwen3Config):
1353
+ text_config_kwargs = copy.deepcopy(resolved_text_config.to_dict())
1354
+ elif isinstance(resolved_text_config, dict):
1355
+ text_config_kwargs = copy.deepcopy(resolved_text_config)
1356
+ elif resolved_text_config is None:
1357
+ text_config_kwargs = {}
1358
+ else:
1359
+ raise TypeError("`text_config` must be a mapping or `Qwen3Config` instance when provided.")
1360
 
1361
+ text_config_kwargs.update(kwargs)
1362
+
1363
+ super().__init__(**text_config_kwargs)
1364
+ self.text_config = Qwen3Config(**text_config_kwargs)
1365
+ if self._rope_scaling is None:
1366
+ self._rope_scaling = getattr(self.text_config, "rope_scaling", None)
1367
+ else:
1368
+ self.text_config.rope_scaling = self._rope_scaling
1369
+
1370
+ # Handle vision config - either dict or IsaacVisionConfig instance
1371
  if isinstance(vision_config, dict):
1372
  self.vision_config = self.sub_configs["vision_config"](**vision_config)
1373
+ elif isinstance(vision_config, IsaacVisionConfig):
1374
+ self.vision_config = vision_config
1375
  elif vision_config is None:
1376
  self.vision_config = self.sub_configs["vision_config"]()
 
 
1377
 
1378
+ # Vision normalization parameters
1379
+ self.vision_rescale_factor = float(vision_rescale_factor)
 
 
 
1380
 
1381
  # Processing parameters
1382
  self.max_sequence_length = max_sequence_length
1383
  self.vision_token = vision_token
1384
+
1385
+ def get_text_config(self, *_, **kwargs) -> Qwen3Config:
1386
+ # Accept optional decoder/encoder flags to align with HF composite configs
1387
+ kwargs.pop("decoder", None)
1388
+ kwargs.pop("encoder", None)
1389
+ return self.text_config
1390
+
1391
+ @property
1392
+ def rope_scaling(self):
1393
+ if hasattr(self, "text_config") and self.text_config is not None:
1394
+ return getattr(self.text_config, "rope_scaling", None)
1395
+ return self._rope_scaling
1396
+
1397
+ @rope_scaling.setter
1398
+ def rope_scaling(self, value):
1399
+ self._rope_scaling = value
1400
+ if hasattr(self, "text_config") and self.text_config is not None:
1401
+ self.text_config.rope_scaling = value
1402
+
1403
+ @property
1404
+ def vision_attn_implementation(self) -> str | None:
1405
+
1406
+ value = getattr(self.vision_config, "_attn_implementation", None)
1407
+ if value is None:
1408
+ value = getattr(self.vision_config, "attn_implementation", None)
1409
+ return value
1410
+
1411
+ @vision_attn_implementation.setter
1412
+ def vision_attn_implementation(self, value: str | None) -> None:
1413
+ self.vision_config._attn_implementation = value
1414
+ if value is not None:
1415
+ self.vision_config.attn_implementation = value
1416
+ elif hasattr(self.vision_config, "attn_implementation"):
1417
+ delattr(self.vision_config, "attn_implementation")
1418
 
1419
 
1420
  # ============================================================================
 
1466
 
1467
 
1468
  class IsaacProcessor(ProcessorMixin):
1469
+ attributes = ["image_processor", "tokenizer"]
1470
+ image_processor_class = ("IsaacImageProcessorFast",)
1471
  tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
1472
+ valid_processor_kwargs = IsaacProcessorKwargs
1473
 
1474
  def __init__(
1475
  self,
1476
+ image_processor: IsaacImageProcessorFast | None = None,
1477
+ tokenizer: Qwen2Tokenizer | None = None,
1478
+ *,
1479
+ vision_token: str = "<image>",
1480
+ max_sequence_length: int = 16384,
1481
+ rescale_factor: float | None = None,
1482
+ config: IsaacConfig | dict | None = None,
1483
+ ) -> None:
1484
+ if tokenizer is None:
1485
+ raise ValueError("`tokenizer` must be provided to initialize IsaacProcessor.")
1486
 
1487
  if isinstance(config, dict):
1488
  config = IsaacConfig(**config)
 
1489
 
1490
+ if config is not None:
1491
+ max_sequence_length = config.max_sequence_length
1492
+ vision_token = config.vision_token
1493
+ rescale_factor = config.vision_rescale_factor
1494
 
1495
+ resolved_rescale_factor = (
1496
+ float(rescale_factor) if rescale_factor is not None else float(1/255)
1497
+ )
1498
 
1499
+ if config is not None:
1500
+ config.vision_rescale_factor = resolved_rescale_factor
 
 
 
1501
 
1502
+ self.image_processor = image_processor
1503
+
1504
+ super().__init__(image_processor, tokenizer)
1505
+ self.current_processor = self.image_processor
1506
+ self.config = config
1507
+
1508
+ # Mirror tokenizer chat template so ProcessorMixin.apply_chat_template works.
1509
+ self.chat_template = getattr(self.tokenizer, "chat_template", None)
1510
+
1511
+ self.vision_token = vision_token
1512
+ self.max_sequence_length = max_sequence_length
1513
 
1514
  def build_event_stream_simple(
1515
  self,
 
1527
  for current_time, part in enumerate(parts):
1528
  if part == self.vision_token:
1529
  # Replace vision token with image event
1530
+ if images is None or image_idx >= len(images):
1531
+ raise ValueError("Encountered vision token without a corresponding image.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1532
 
1533
+ features = self.image_processor(
1534
+ images=images[image_idx],
1535
+ return_tensors=TensorType.PYTORCH,
 
 
 
 
 
 
 
 
 
 
 
 
 
1536
  )
1537
 
1538
+ patches = features["patches"][0] # (H_tokens, W_tokens, embed)
1539
+ virtual_dims = features["virtual_pixel_size"][0].tolist()
1540
+ real_dims = features["real_pixel_size"][0].tolist()
1541
+
1542
+ vision_event = Event(
1543
+ data=patches.reshape(-1, patches.shape[-1]),
1544
+ type=VisionType.image,
1545
+ time=(current_time, current_time),
1546
+ dims_virtual=virtual_dims,
1547
+ dims_real=real_dims,
1548
+ idx_range=(0, math.prod(virtual_dims)),
1549
  )
1550
+ events.append(vision_event)
1551
+ image_idx += 1
1552
+ elif part: # Non-empty text part
1553
+ # tokens = self.text_processor.tokenize(part, add_special_tokens=False)
1554
+ text_event = create_text_event(self.tokenizer, part, time=current_time)
1555
+ events.append(text_event)
 
1556
 
1557
  # Create stream without scheduling (events already in order)
1558
  return create_stream(events, priority=[TextType.text, VisionType.image], schedule=True)
 
1652
 
1653
 
1654
  class IsaacRotaryEmbedding(nn.Module):
1655
+ EXTRA_ROPE_KEYS = {"mrope_section", "mrope_interleaved"}
1656
+
1657
  def __init__(self, config: IsaacConfig, device=None):
1658
  super().__init__()
1659
 
1660
+ rope_source_cfg = config.get_text_config() if hasattr(config, "get_text_config") else config
1661
+ rope_scaling = getattr(rope_source_cfg, "rope_scaling", None) or {}
1662
+
1663
+ sanitized_scaling = {k: v for k, v in rope_scaling.items() if k not in self.EXTRA_ROPE_KEYS}
1664
+ config_for_rope = copy.copy(rope_source_cfg)
1665
+ config_for_rope.rope_scaling = sanitized_scaling if sanitized_scaling else None
1666
 
1667
+ init_device = device if device is not None and getattr(device, "type", None) != "meta" else None
1668
+ self._qwen_rotary = Qwen2_5_VLRotaryEmbedding(config_for_rope, device=init_device)
1669
 
1670
+ rotary_half_dim = self._qwen_rotary.inv_freq.shape[0]
1671
+ self.mrope_section = self._resolve_mrope_section(rope_scaling.get("mrope_section"), rotary_half_dim)
1672
+ self.hidden_size = getattr(rope_source_cfg, "hidden_size", None) or config.hidden_size
1673
 
1674
+ @staticmethod
1675
+ def _resolve_mrope_section(section: list[int] | None, rotary_half_dim: int) -> list[int]:
1676
+ if section is None:
1677
+ weights = (2, 1, 1)
1678
+ base = [rotary_half_dim * w // sum(weights) for w in weights]
1679
+ base[0] += rotary_half_dim - sum(base)
1680
+ return base
1681
+
1682
+ section = [int(v) for v in section]
1683
+ if len(section) != 3:
1684
+ raise ValueError("`mrope_section` must contain exactly three elements (temporal, height, width)")
1685
+ if sum(section) != rotary_half_dim:
1686
+ raise ValueError(
1687
+ f"`mrope_section` must sum to the rotary half-dimension ({rotary_half_dim}). Received {section}."
1688
+ )
1689
+ return section
1690
 
1691
+ def _combine_axes(self, tensor: torch.Tensor) -> torch.Tensor:
1692
+ split_sections = tuple(self.mrope_section * 2)
1693
+ chunks = tensor.split(split_sections, dim=-1)
1694
+ return torch.cat([chunk[i % 3] for i, chunk in enumerate(chunks)], dim=-1)
1695
+
1696
+ @property
1697
+ def inv_freq(self) -> torch.Tensor:
1698
+ return self._qwen_rotary.inv_freq
1699
+
1700
+ def forward(
1701
+ self,
1702
+ position_ids: torch.Tensor,
1703
+ modality_tensor: torch.Tensor,
1704
+ hidden_states: torch.Tensor | None = None,
1705
+ ) -> tuple[torch.Tensor, torch.Tensor]:
1706
+ if position_ids.ndim != 3 or position_ids.size(-1) != 3:
1707
+ raise ValueError("`position_ids` must have shape (batch, seq_len, 3) for MRoPE")
1708
+ if modality_tensor.shape != position_ids.shape[:2]:
1709
+ raise ValueError("`modality_tensor` must align with the first two dims of `position_ids`")
1710
+
1711
+ if hidden_states is None:
1712
+ batch, seq_len, _ = position_ids.shape
1713
+ hidden_states = torch.zeros(
1714
+ batch,
1715
+ seq_len,
1716
+ self.hidden_size,
1717
+ dtype=torch.float32,
1718
+ device=position_ids.device,
1719
+ )
1720
 
 
1721
  with torch.no_grad():
1722
+ pos = position_ids.clone()
1723
+ not_spatial = modality_tensor != VisionType.image.value
1724
+ if not_spatial.any():
1725
+ data_1d = pos[not_spatial][..., 0].unsqueeze(-1)
1726
+ pos[not_spatial] = data_1d.expand(-1, pos.shape[-1])
1727
+
1728
+ pos_axes = pos.permute(2, 0, 1).contiguous()
1729
+
1730
+ cos_axes, sin_axes = self._qwen_rotary(hidden_states, pos_axes)
1731
+
1732
+ cos_axes = cos_axes.to(hidden_states.dtype)
1733
+ sin_axes = sin_axes.to(hidden_states.dtype)
1734
+
1735
+ cos_combined = self._combine_axes(cos_axes)
1736
+ sin_combined = self._combine_axes(sin_axes)
1737
 
1738
+ return cos_combined, sin_combined
1739
 
1740
+ class IsaacModel(Qwen3PreTrainedModel):
1741
+ supports_gradient_checkpointing = True
1742
 
 
1743
  def __init__(self, config: IsaacConfig):
1744
+ Qwen3PreTrainedModel.__init__(self, config)
1745
+
1746
+ text_cfg_source = getattr(config, "get_text_config", lambda: config)()
1747
+ text_cfg = copy.deepcopy(text_cfg_source)
1748
+ text_cfg._attn_implementation = config._attn_implementation
1749
+ self.text_model = AutoModel.from_config(text_cfg)
1750
+ # Ensure downstream callers observe the composed config
1751
+ self.text_model.config = config
1752
+
1753
  self.rotary_emb = IsaacRotaryEmbedding(config, device=self.device)
1754
 
1755
+ if config.vision_config is None:
 
 
 
 
 
 
 
1756
  raise ValueError("IsaacConfig should always have vision_config")
1757
 
1758
+ hidden_dim = config.vision_config.hidden_size * (config.vision_config.pixel_shuffle_scale_factor**2)
1759
  self.vision_embedding = nn.Sequential(
1760
+ IsaacVisionTransformer(config.vision_config),
1761
  nn.Linear(
1762
  hidden_dim,
1763
  4 * hidden_dim,
 
1773
  VisionType: self.embed_vision,
1774
  }
1775
 
1776
+ def get_input_embeddings(self) -> nn.Module:
1777
+ return self.text_model.get_input_embeddings()
1778
+
1779
+ def set_input_embeddings(self, value: nn.Module) -> None:
1780
+ self.text_model.set_input_embeddings(value)
1781
+
1782
+ @property
1783
+ def embed_tokens(self) -> nn.Module:
1784
+ return self.text_model.embed_tokens
1785
+
1786
+ @embed_tokens.setter
1787
+ def embed_tokens(self, value: nn.Module) -> None:
1788
+ self.text_model.embed_tokens = value
1789
+
1790
+ @property
1791
+ def layers(self) -> nn.ModuleList:
1792
+ return self.text_model.layers
1793
+
1794
+ @property
1795
+ def norm(self) -> nn.Module:
1796
+ return self.text_model.norm
1797
+
1798
+ def _set_gradient_checkpointing(self, enable: bool = True, gradient_checkpointing_func=None):
1799
+ self.text_model._set_gradient_checkpointing(
1800
+ enable=enable, gradient_checkpointing_func=gradient_checkpointing_func
1801
+ )
1802
+
1803
  def embed_text_tokens(self, token_ids: torch.Tensor) -> torch.Tensor:
1804
  """Embed text tokens, squeezing singleton dimensions."""
1805
  # Text events are shaped as (..., 1); squeeze the singleton index dim
1806
+ h = self.text_model.embed_tokens(token_ids)
1807
  if h.dim() >= 2 and h.size(-2) == 1:
1808
  h = h[..., 0, :]
1809
  return h
 
1885
  elif input_ids is not None and inputs_embeds is not None:
1886
  raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
1887
  elif input_ids is not None:
1888
+ inputs_embeds = self.text_model.embed_tokens(input_ids)
1889
  # Create text modality tensor if not provided
1890
  if modality_tensor is None:
1891
  batch_size, seq_length = input_ids.shape
 
1903
  position_ids = compute_position_ids_input_ids(input_ids)
1904
 
1905
  # Compute MRoPE position embeddings if we have custom rotary_emb
1906
+ cos, sin = self.rotary_emb(
1907
+ position_ids,
1908
+ modality_tensor,
1909
+ hidden_states=inputs_embeds,
1910
+ )
1911
  cos = cos.to(inputs_embeds.dtype)
1912
  sin = sin.to(inputs_embeds.dtype)
1913
 
 
1920
  # Initialize hidden states
1921
  hidden_states = inputs_embeds
1922
 
1923
+ for decoder_layer in self.text_model.layers:
1924
  layer_outputs = decoder_layer(
1925
  hidden_states,
1926
  attention_mask=attention_mask,
 
1935
  hidden_states = layer_outputs[0] if isinstance(layer_outputs, tuple) else layer_outputs
1936
 
1937
  # Final layer norm
1938
+ hidden_states = self.text_model.norm(hidden_states)
1939
 
1940
  return BaseModelOutputWithPast(
1941
  last_hidden_state=hidden_states,
 
2099
  config_class = IsaacConfig
2100
 
2101
  def __init__(self, config: IsaacConfig):
2102
+ super().__init__(config)
2103
  self.model = IsaacModel(config) # Use our custom model
2104
  self.vocab_size = config.vocab_size
2105
  self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
2106
  # Tracks rotary position offsets computed during a full forward pass so decode steps can reuse them.
2107
  self.rope_deltas = None
2108
 
 
 
2109
  def get_rope_index(
2110
  self,
2111
  input_ids: torch.Tensor | None,
 
2261
  return True
2262
 
2263
 
2264
+ AutoImageProcessor.register(
2265
+ IsaacConfig,
2266
+ fast_image_processor_class=IsaacImageProcessorFast,
2267
+ exist_ok=True,
2268
+ )
2269
+
2270
+
2271
  __all__ = [
2272
  "IsaacConfig",
2273
  "IsaacModel",
2274
  "IsaacForConditionalGeneration",
2275
+ "IsaacImageProcessorFast",
2276
  "IsaacProcessor",
2277
  ]
2278
+
2279
+
2280
+ def _compute_residual_p_frames(frames: torch.Tensor, is_p_frame: list[bool]) -> torch.Tensor:
2281
+ """Compute residuals for P-frames to stay in sync with the training pipeline."""
2282
+ if not any(is_p_frame):
2283
+ return frames
2284
+
2285
+ frame_indices = torch.arange(len(is_p_frame), device=frames.device)
2286
+ i_frame_mask = torch.tensor([not flag for flag in is_p_frame], device=frames.device)
2287
+ last_i_indices = torch.cummax((i_frame_mask * (1 + frame_indices)), dim=0).values.long() - 1
2288
+ p_indices = frame_indices[torch.tensor(is_p_frame, device=frames.device)]
2289
+ frames[p_indices] = frames[p_indices] - frames[last_i_indices[p_indices]]
2290
+ return frames
processor_config.json CHANGED
@@ -2,208 +2,46 @@
2
  "auto_map": {
3
  "AutoProcessor": "modular_isaac.IsaacProcessor"
4
  },
5
- "config": {
6
- "_name_or_path": "",
7
- "add_cross_attention": false,
8
- "architectures": [
9
- "IsaacForConditionalGeneration"
10
- ],
11
- "attention_bias": false,
12
- "attention_dropout": 0.0,
13
  "auto_map": {
14
- "AutoModelForCausalLM": "modular_isaac.IsaacForConditionalGeneration"
15
- },
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
- "bos_token_id": 151643,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "dtype": "float32",
25
- "early_stopping": false,
26
- "encoder_no_repeat_ngram_size": 0,
27
- "eos_token_id": 151645,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
- "forced_bos_token_id": null,
31
- "forced_eos_token_id": null,
32
- "head_dim": 128,
33
- "hidden_act": "silu",
34
- "hidden_size": 2048,
35
- "id2label": {
36
- "0": "LABEL_0",
37
- "1": "LABEL_1"
38
- },
39
- "initializer_range": 0.02,
40
- "intermediate_size": 6144,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
- "label2id": {
44
- "LABEL_0": 0,
45
- "LABEL_1": 1
46
  },
47
- "layer_types": [
48
- "full_attention",
49
- "full_attention",
50
- "full_attention",
51
- "full_attention",
52
- "full_attention",
53
- "full_attention",
54
- "full_attention",
55
- "full_attention",
56
- "full_attention",
57
- "full_attention",
58
- "full_attention",
59
- "full_attention",
60
- "full_attention",
61
- "full_attention",
62
- "full_attention",
63
- "full_attention",
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention"
76
  ],
77
- "length_penalty": 1.0,
78
- "max_length": 20,
79
- "max_position_embeddings": 40960,
80
- "max_sequence_length": 16384,
81
- "max_window_layers": 28,
82
- "min_length": 0,
83
- "model_type": "isaac",
84
- "no_repeat_ngram_size": 0,
85
- "num_attention_heads": 16,
86
- "num_beam_groups": 1,
87
- "num_beams": 1,
88
- "num_hidden_layers": 28,
89
- "num_key_value_heads": 8,
90
- "num_return_sequences": 1,
91
- "output_attentions": false,
92
- "output_hidden_states": false,
93
- "output_scores": false,
94
- "pad_token_id": null,
95
  "pixel_shuffle_scale": 2,
96
- "prefix": null,
97
- "problem_type": null,
98
- "pruned_heads": {},
99
- "remove_invalid_values": false,
100
- "repetition_penalty": 1.0,
101
- "return_dict": true,
102
- "return_dict_in_generate": false,
103
- "rms_norm_eps": 1e-06,
104
- "rope_scaling": {
105
- "mrope_interleaved": true,
106
- "mrope_section": null,
107
- "rope_type": "default"
108
- },
109
- "rope_theta": 1000000.0,
110
- "sep_token_id": null,
111
- "sliding_window": null,
112
- "suppress_tokens": null,
113
- "task_specific_params": null,
114
- "temperature": 1.0,
115
- "tf_legacy_loss": false,
116
- "tie_encoder_decoder": false,
117
- "tie_word_embeddings": false,
118
- "tokenizer_class": null,
119
- "top_k": 50,
120
- "top_p": 1.0,
121
- "torchscript": false,
122
- "transformers_version": "4.56.1",
123
- "typical_p": 1.0,
124
- "use_bfloat16": false,
125
- "use_cache": true,
126
- "use_sliding_window": false,
127
- "video_patch_size": 16,
128
- "vision_config": {
129
- "_name_or_path": "",
130
- "add_cross_attention": false,
131
- "architectures": null,
132
- "attention_dropout": 0.0,
133
- "bad_words_ids": null,
134
- "begin_suppress_tokens": null,
135
- "bos_token_id": null,
136
- "chunk_size_feed_forward": 0,
137
- "cross_attention_hidden_size": null,
138
- "decoder_start_token_id": null,
139
- "diversity_penalty": 0.0,
140
- "do_sample": false,
141
- "dtype": null,
142
- "early_stopping": false,
143
- "encoder_no_repeat_ngram_size": 0,
144
- "eos_token_id": null,
145
- "exponential_decay_length_penalty": null,
146
- "finetuning_task": null,
147
- "forced_bos_token_id": null,
148
- "forced_eos_token_id": null,
149
- "hidden_act": "gelu_pytorch_tanh",
150
- "hidden_size": 1152,
151
- "id2label": {
152
- "0": "LABEL_0",
153
- "1": "LABEL_1"
154
- },
155
- "image_size": 256,
156
- "intermediate_size": 4304,
157
- "is_decoder": false,
158
- "is_encoder_decoder": false,
159
- "label2id": {
160
- "LABEL_0": 0,
161
- "LABEL_1": 1
162
- },
163
- "layer_norm_eps": 1e-06,
164
- "length_penalty": 1.0,
165
- "max_length": 20,
166
- "min_length": 0,
167
- "model_type": "pixel_shuffle_siglip2",
168
- "no_repeat_ngram_size": 0,
169
- "num_attention_heads": 16,
170
- "num_beam_groups": 1,
171
- "num_beams": 1,
172
- "num_channels": 3,
173
- "num_hidden_layers": 27,
174
- "num_patches": 256,
175
- "num_return_sequences": 1,
176
- "output_attentions": false,
177
- "output_hidden_states": false,
178
- "output_scores": false,
179
- "pad_token_id": null,
180
- "patch_size": 16,
181
- "pixel_shuffle_scale_factor": 2,
182
- "prefix": null,
183
- "problem_type": null,
184
- "pruned_heads": {},
185
- "remove_invalid_values": false,
186
- "repetition_penalty": 1.0,
187
- "return_dict": true,
188
- "return_dict_in_generate": false,
189
- "sep_token_id": null,
190
- "suppress_tokens": null,
191
- "task_specific_params": null,
192
- "temperature": 1.0,
193
- "tf_legacy_loss": false,
194
- "tie_encoder_decoder": false,
195
- "tie_word_embeddings": true,
196
- "tokenizer_class": null,
197
- "top_k": 50,
198
- "top_p": 1.0,
199
- "torchscript": false,
200
- "typical_p": 1.0,
201
- "use_bfloat16": false
202
- },
203
- "vision_max_num_patches": 6144,
204
- "vision_min_num_patches": 256,
205
- "vision_token": "<image>",
206
- "vocab_size": 151936
207
  },
208
- "processor_class": "IsaacProcessor"
 
 
209
  }
 
2
  "auto_map": {
3
  "AutoProcessor": "modular_isaac.IsaacProcessor"
4
  },
5
+ "config": null,
6
+ "image_processor": {
7
+ "_processor_class": null,
 
 
 
 
 
8
  "auto_map": {
9
+ "AutoProcessor": "modular_isaac.IsaacProcessor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  },
11
+ "crop_size": null,
12
+ "data_format": "channels_first",
13
+ "default_to_square": null,
14
+ "device": null,
15
+ "disable_grouping": false,
16
+ "do_center_crop": false,
17
+ "do_convert_rgb": true,
18
+ "do_normalize": true,
19
+ "do_pad": false,
20
+ "do_rescale": true,
21
+ "do_resize": true,
22
+ "image_mean": [
23
+ 0.5,
24
+ 0.5,
25
+ 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  ],
27
+ "image_processor_type": "IsaacImageProcessorFast",
28
+ "image_std": [
29
+ 0.5,
30
+ 0.5,
31
+ 0.5
32
+ ],
33
+ "input_data_format": null,
34
+ "max_num_patches": 6144,
35
+ "min_num_patches": 256,
36
+ "pad_size": null,
37
+ "patch_size": 16,
 
 
 
 
 
 
 
38
  "pixel_shuffle_scale": 2,
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "return_tensors": null,
42
+ "size": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  },
44
+ "max_sequence_length": 16384,
45
+ "processor_class": "IsaacProcessor",
46
+ "vision_token": "<image>"
47
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c670a45d54b226b4213f50c920332be152acff8fafaabdafd5586e772c3d500
3
+ size 11473541
tokenizer_config.json CHANGED
@@ -209,6 +209,2142 @@
209
  "rstrip": false,
210
  "single_word": false,
211
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  }
213
  },
214
  "additional_special_tokens": [
 
209
  "rstrip": false,
210
  "single_word": false,
211
  "special": false
212
+ },
213
+ "151669": {
214
+ "content": "<reserved_0>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "<reserved_1>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "<reserved_2>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "<reserved_3>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "<reserved_4>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<reserved_5>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "<reserved_6>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "151676": {
270
+ "content": "<reserved_7>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "151677": {
278
+ "content": "<reserved_8>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "151678": {
286
+ "content": "<reserved_9>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "151679": {
294
+ "content": "<reserved_10>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "151680": {
302
+ "content": "<reserved_11>",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": true
308
+ },
309
+ "151681": {
310
+ "content": "<reserved_12>",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": true
316
+ },
317
+ "151682": {
318
+ "content": "<reserved_13>",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": true
324
+ },
325
+ "151683": {
326
+ "content": "<reserved_14>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": true
332
+ },
333
+ "151684": {
334
+ "content": "<reserved_15>",
335
+ "lstrip": false,
336
+ "normalized": false,
337
+ "rstrip": false,
338
+ "single_word": false,
339
+ "special": true
340
+ },
341
+ "151685": {
342
+ "content": "<reserved_16>",
343
+ "lstrip": false,
344
+ "normalized": false,
345
+ "rstrip": false,
346
+ "single_word": false,
347
+ "special": true
348
+ },
349
+ "151686": {
350
+ "content": "<reserved_17>",
351
+ "lstrip": false,
352
+ "normalized": false,
353
+ "rstrip": false,
354
+ "single_word": false,
355
+ "special": true
356
+ },
357
+ "151687": {
358
+ "content": "<reserved_18>",
359
+ "lstrip": false,
360
+ "normalized": false,
361
+ "rstrip": false,
362
+ "single_word": false,
363
+ "special": true
364
+ },
365
+ "151688": {
366
+ "content": "<reserved_19>",
367
+ "lstrip": false,
368
+ "normalized": false,
369
+ "rstrip": false,
370
+ "single_word": false,
371
+ "special": true
372
+ },
373
+ "151689": {
374
+ "content": "<reserved_20>",
375
+ "lstrip": false,
376
+ "normalized": false,
377
+ "rstrip": false,
378
+ "single_word": false,
379
+ "special": true
380
+ },
381
+ "151690": {
382
+ "content": "<reserved_21>",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false,
387
+ "special": true
388
+ },
389
+ "151691": {
390
+ "content": "<reserved_22>",
391
+ "lstrip": false,
392
+ "normalized": false,
393
+ "rstrip": false,
394
+ "single_word": false,
395
+ "special": true
396
+ },
397
+ "151692": {
398
+ "content": "<reserved_23>",
399
+ "lstrip": false,
400
+ "normalized": false,
401
+ "rstrip": false,
402
+ "single_word": false,
403
+ "special": true
404
+ },
405
+ "151693": {
406
+ "content": "<reserved_24>",
407
+ "lstrip": false,
408
+ "normalized": false,
409
+ "rstrip": false,
410
+ "single_word": false,
411
+ "special": true
412
+ },
413
+ "151694": {
414
+ "content": "<reserved_25>",
415
+ "lstrip": false,
416
+ "normalized": false,
417
+ "rstrip": false,
418
+ "single_word": false,
419
+ "special": true
420
+ },
421
+ "151695": {
422
+ "content": "<reserved_26>",
423
+ "lstrip": false,
424
+ "normalized": false,
425
+ "rstrip": false,
426
+ "single_word": false,
427
+ "special": true
428
+ },
429
+ "151696": {
430
+ "content": "<reserved_27>",
431
+ "lstrip": false,
432
+ "normalized": false,
433
+ "rstrip": false,
434
+ "single_word": false,
435
+ "special": true
436
+ },
437
+ "151697": {
438
+ "content": "<reserved_28>",
439
+ "lstrip": false,
440
+ "normalized": false,
441
+ "rstrip": false,
442
+ "single_word": false,
443
+ "special": true
444
+ },
445
+ "151698": {
446
+ "content": "<reserved_29>",
447
+ "lstrip": false,
448
+ "normalized": false,
449
+ "rstrip": false,
450
+ "single_word": false,
451
+ "special": true
452
+ },
453
+ "151699": {
454
+ "content": "<reserved_30>",
455
+ "lstrip": false,
456
+ "normalized": false,
457
+ "rstrip": false,
458
+ "single_word": false,
459
+ "special": true
460
+ },
461
+ "151700": {
462
+ "content": "<reserved_31>",
463
+ "lstrip": false,
464
+ "normalized": false,
465
+ "rstrip": false,
466
+ "single_word": false,
467
+ "special": true
468
+ },
469
+ "151701": {
470
+ "content": "<reserved_32>",
471
+ "lstrip": false,
472
+ "normalized": false,
473
+ "rstrip": false,
474
+ "single_word": false,
475
+ "special": true
476
+ },
477
+ "151702": {
478
+ "content": "<reserved_33>",
479
+ "lstrip": false,
480
+ "normalized": false,
481
+ "rstrip": false,
482
+ "single_word": false,
483
+ "special": true
484
+ },
485
+ "151703": {
486
+ "content": "<reserved_34>",
487
+ "lstrip": false,
488
+ "normalized": false,
489
+ "rstrip": false,
490
+ "single_word": false,
491
+ "special": true
492
+ },
493
+ "151704": {
494
+ "content": "<reserved_35>",
495
+ "lstrip": false,
496
+ "normalized": false,
497
+ "rstrip": false,
498
+ "single_word": false,
499
+ "special": true
500
+ },
501
+ "151705": {
502
+ "content": "<reserved_36>",
503
+ "lstrip": false,
504
+ "normalized": false,
505
+ "rstrip": false,
506
+ "single_word": false,
507
+ "special": true
508
+ },
509
+ "151706": {
510
+ "content": "<reserved_37>",
511
+ "lstrip": false,
512
+ "normalized": false,
513
+ "rstrip": false,
514
+ "single_word": false,
515
+ "special": true
516
+ },
517
+ "151707": {
518
+ "content": "<reserved_38>",
519
+ "lstrip": false,
520
+ "normalized": false,
521
+ "rstrip": false,
522
+ "single_word": false,
523
+ "special": true
524
+ },
525
+ "151708": {
526
+ "content": "<reserved_39>",
527
+ "lstrip": false,
528
+ "normalized": false,
529
+ "rstrip": false,
530
+ "single_word": false,
531
+ "special": true
532
+ },
533
+ "151709": {
534
+ "content": "<reserved_40>",
535
+ "lstrip": false,
536
+ "normalized": false,
537
+ "rstrip": false,
538
+ "single_word": false,
539
+ "special": true
540
+ },
541
+ "151710": {
542
+ "content": "<reserved_41>",
543
+ "lstrip": false,
544
+ "normalized": false,
545
+ "rstrip": false,
546
+ "single_word": false,
547
+ "special": true
548
+ },
549
+ "151711": {
550
+ "content": "<reserved_42>",
551
+ "lstrip": false,
552
+ "normalized": false,
553
+ "rstrip": false,
554
+ "single_word": false,
555
+ "special": true
556
+ },
557
+ "151712": {
558
+ "content": "<reserved_43>",
559
+ "lstrip": false,
560
+ "normalized": false,
561
+ "rstrip": false,
562
+ "single_word": false,
563
+ "special": true
564
+ },
565
+ "151713": {
566
+ "content": "<reserved_44>",
567
+ "lstrip": false,
568
+ "normalized": false,
569
+ "rstrip": false,
570
+ "single_word": false,
571
+ "special": true
572
+ },
573
+ "151714": {
574
+ "content": "<reserved_45>",
575
+ "lstrip": false,
576
+ "normalized": false,
577
+ "rstrip": false,
578
+ "single_word": false,
579
+ "special": true
580
+ },
581
+ "151715": {
582
+ "content": "<reserved_46>",
583
+ "lstrip": false,
584
+ "normalized": false,
585
+ "rstrip": false,
586
+ "single_word": false,
587
+ "special": true
588
+ },
589
+ "151716": {
590
+ "content": "<reserved_47>",
591
+ "lstrip": false,
592
+ "normalized": false,
593
+ "rstrip": false,
594
+ "single_word": false,
595
+ "special": true
596
+ },
597
+ "151717": {
598
+ "content": "<reserved_48>",
599
+ "lstrip": false,
600
+ "normalized": false,
601
+ "rstrip": false,
602
+ "single_word": false,
603
+ "special": true
604
+ },
605
+ "151718": {
606
+ "content": "<reserved_49>",
607
+ "lstrip": false,
608
+ "normalized": false,
609
+ "rstrip": false,
610
+ "single_word": false,
611
+ "special": true
612
+ },
613
+ "151719": {
614
+ "content": "<reserved_50>",
615
+ "lstrip": false,
616
+ "normalized": false,
617
+ "rstrip": false,
618
+ "single_word": false,
619
+ "special": true
620
+ },
621
+ "151720": {
622
+ "content": "<reserved_51>",
623
+ "lstrip": false,
624
+ "normalized": false,
625
+ "rstrip": false,
626
+ "single_word": false,
627
+ "special": true
628
+ },
629
+ "151721": {
630
+ "content": "<reserved_52>",
631
+ "lstrip": false,
632
+ "normalized": false,
633
+ "rstrip": false,
634
+ "single_word": false,
635
+ "special": true
636
+ },
637
+ "151722": {
638
+ "content": "<reserved_53>",
639
+ "lstrip": false,
640
+ "normalized": false,
641
+ "rstrip": false,
642
+ "single_word": false,
643
+ "special": true
644
+ },
645
+ "151723": {
646
+ "content": "<reserved_54>",
647
+ "lstrip": false,
648
+ "normalized": false,
649
+ "rstrip": false,
650
+ "single_word": false,
651
+ "special": true
652
+ },
653
+ "151724": {
654
+ "content": "<reserved_55>",
655
+ "lstrip": false,
656
+ "normalized": false,
657
+ "rstrip": false,
658
+ "single_word": false,
659
+ "special": true
660
+ },
661
+ "151725": {
662
+ "content": "<reserved_56>",
663
+ "lstrip": false,
664
+ "normalized": false,
665
+ "rstrip": false,
666
+ "single_word": false,
667
+ "special": true
668
+ },
669
+ "151726": {
670
+ "content": "<reserved_57>",
671
+ "lstrip": false,
672
+ "normalized": false,
673
+ "rstrip": false,
674
+ "single_word": false,
675
+ "special": true
676
+ },
677
+ "151727": {
678
+ "content": "<reserved_58>",
679
+ "lstrip": false,
680
+ "normalized": false,
681
+ "rstrip": false,
682
+ "single_word": false,
683
+ "special": true
684
+ },
685
+ "151728": {
686
+ "content": "<reserved_59>",
687
+ "lstrip": false,
688
+ "normalized": false,
689
+ "rstrip": false,
690
+ "single_word": false,
691
+ "special": true
692
+ },
693
+ "151729": {
694
+ "content": "<reserved_60>",
695
+ "lstrip": false,
696
+ "normalized": false,
697
+ "rstrip": false,
698
+ "single_word": false,
699
+ "special": true
700
+ },
701
+ "151730": {
702
+ "content": "<reserved_61>",
703
+ "lstrip": false,
704
+ "normalized": false,
705
+ "rstrip": false,
706
+ "single_word": false,
707
+ "special": true
708
+ },
709
+ "151731": {
710
+ "content": "<reserved_62>",
711
+ "lstrip": false,
712
+ "normalized": false,
713
+ "rstrip": false,
714
+ "single_word": false,
715
+ "special": true
716
+ },
717
+ "151732": {
718
+ "content": "<reserved_63>",
719
+ "lstrip": false,
720
+ "normalized": false,
721
+ "rstrip": false,
722
+ "single_word": false,
723
+ "special": true
724
+ },
725
+ "151733": {
726
+ "content": "<reserved_64>",
727
+ "lstrip": false,
728
+ "normalized": false,
729
+ "rstrip": false,
730
+ "single_word": false,
731
+ "special": true
732
+ },
733
+ "151734": {
734
+ "content": "<reserved_65>",
735
+ "lstrip": false,
736
+ "normalized": false,
737
+ "rstrip": false,
738
+ "single_word": false,
739
+ "special": true
740
+ },
741
+ "151735": {
742
+ "content": "<reserved_66>",
743
+ "lstrip": false,
744
+ "normalized": false,
745
+ "rstrip": false,
746
+ "single_word": false,
747
+ "special": true
748
+ },
749
+ "151736": {
750
+ "content": "<reserved_67>",
751
+ "lstrip": false,
752
+ "normalized": false,
753
+ "rstrip": false,
754
+ "single_word": false,
755
+ "special": true
756
+ },
757
+ "151737": {
758
+ "content": "<reserved_68>",
759
+ "lstrip": false,
760
+ "normalized": false,
761
+ "rstrip": false,
762
+ "single_word": false,
763
+ "special": true
764
+ },
765
+ "151738": {
766
+ "content": "<reserved_69>",
767
+ "lstrip": false,
768
+ "normalized": false,
769
+ "rstrip": false,
770
+ "single_word": false,
771
+ "special": true
772
+ },
773
+ "151739": {
774
+ "content": "<reserved_70>",
775
+ "lstrip": false,
776
+ "normalized": false,
777
+ "rstrip": false,
778
+ "single_word": false,
779
+ "special": true
780
+ },
781
+ "151740": {
782
+ "content": "<reserved_71>",
783
+ "lstrip": false,
784
+ "normalized": false,
785
+ "rstrip": false,
786
+ "single_word": false,
787
+ "special": true
788
+ },
789
+ "151741": {
790
+ "content": "<reserved_72>",
791
+ "lstrip": false,
792
+ "normalized": false,
793
+ "rstrip": false,
794
+ "single_word": false,
795
+ "special": true
796
+ },
797
+ "151742": {
798
+ "content": "<reserved_73>",
799
+ "lstrip": false,
800
+ "normalized": false,
801
+ "rstrip": false,
802
+ "single_word": false,
803
+ "special": true
804
+ },
805
+ "151743": {
806
+ "content": "<reserved_74>",
807
+ "lstrip": false,
808
+ "normalized": false,
809
+ "rstrip": false,
810
+ "single_word": false,
811
+ "special": true
812
+ },
813
+ "151744": {
814
+ "content": "<reserved_75>",
815
+ "lstrip": false,
816
+ "normalized": false,
817
+ "rstrip": false,
818
+ "single_word": false,
819
+ "special": true
820
+ },
821
+ "151745": {
822
+ "content": "<reserved_76>",
823
+ "lstrip": false,
824
+ "normalized": false,
825
+ "rstrip": false,
826
+ "single_word": false,
827
+ "special": true
828
+ },
829
+ "151746": {
830
+ "content": "<reserved_77>",
831
+ "lstrip": false,
832
+ "normalized": false,
833
+ "rstrip": false,
834
+ "single_word": false,
835
+ "special": true
836
+ },
837
+ "151747": {
838
+ "content": "<reserved_78>",
839
+ "lstrip": false,
840
+ "normalized": false,
841
+ "rstrip": false,
842
+ "single_word": false,
843
+ "special": true
844
+ },
845
+ "151748": {
846
+ "content": "<reserved_79>",
847
+ "lstrip": false,
848
+ "normalized": false,
849
+ "rstrip": false,
850
+ "single_word": false,
851
+ "special": true
852
+ },
853
+ "151749": {
854
+ "content": "<reserved_80>",
855
+ "lstrip": false,
856
+ "normalized": false,
857
+ "rstrip": false,
858
+ "single_word": false,
859
+ "special": true
860
+ },
861
+ "151750": {
862
+ "content": "<reserved_81>",
863
+ "lstrip": false,
864
+ "normalized": false,
865
+ "rstrip": false,
866
+ "single_word": false,
867
+ "special": true
868
+ },
869
+ "151751": {
870
+ "content": "<reserved_82>",
871
+ "lstrip": false,
872
+ "normalized": false,
873
+ "rstrip": false,
874
+ "single_word": false,
875
+ "special": true
876
+ },
877
+ "151752": {
878
+ "content": "<reserved_83>",
879
+ "lstrip": false,
880
+ "normalized": false,
881
+ "rstrip": false,
882
+ "single_word": false,
883
+ "special": true
884
+ },
885
+ "151753": {
886
+ "content": "<reserved_84>",
887
+ "lstrip": false,
888
+ "normalized": false,
889
+ "rstrip": false,
890
+ "single_word": false,
891
+ "special": true
892
+ },
893
+ "151754": {
894
+ "content": "<reserved_85>",
895
+ "lstrip": false,
896
+ "normalized": false,
897
+ "rstrip": false,
898
+ "single_word": false,
899
+ "special": true
900
+ },
901
+ "151755": {
902
+ "content": "<reserved_86>",
903
+ "lstrip": false,
904
+ "normalized": false,
905
+ "rstrip": false,
906
+ "single_word": false,
907
+ "special": true
908
+ },
909
+ "151756": {
910
+ "content": "<reserved_87>",
911
+ "lstrip": false,
912
+ "normalized": false,
913
+ "rstrip": false,
914
+ "single_word": false,
915
+ "special": true
916
+ },
917
+ "151757": {
918
+ "content": "<reserved_88>",
919
+ "lstrip": false,
920
+ "normalized": false,
921
+ "rstrip": false,
922
+ "single_word": false,
923
+ "special": true
924
+ },
925
+ "151758": {
926
+ "content": "<reserved_89>",
927
+ "lstrip": false,
928
+ "normalized": false,
929
+ "rstrip": false,
930
+ "single_word": false,
931
+ "special": true
932
+ },
933
+ "151759": {
934
+ "content": "<reserved_90>",
935
+ "lstrip": false,
936
+ "normalized": false,
937
+ "rstrip": false,
938
+ "single_word": false,
939
+ "special": true
940
+ },
941
+ "151760": {
942
+ "content": "<reserved_91>",
943
+ "lstrip": false,
944
+ "normalized": false,
945
+ "rstrip": false,
946
+ "single_word": false,
947
+ "special": true
948
+ },
949
+ "151761": {
950
+ "content": "<reserved_92>",
951
+ "lstrip": false,
952
+ "normalized": false,
953
+ "rstrip": false,
954
+ "single_word": false,
955
+ "special": true
956
+ },
957
+ "151762": {
958
+ "content": "<reserved_93>",
959
+ "lstrip": false,
960
+ "normalized": false,
961
+ "rstrip": false,
962
+ "single_word": false,
963
+ "special": true
964
+ },
965
+ "151763": {
966
+ "content": "<reserved_94>",
967
+ "lstrip": false,
968
+ "normalized": false,
969
+ "rstrip": false,
970
+ "single_word": false,
971
+ "special": true
972
+ },
973
+ "151764": {
974
+ "content": "<reserved_95>",
975
+ "lstrip": false,
976
+ "normalized": false,
977
+ "rstrip": false,
978
+ "single_word": false,
979
+ "special": true
980
+ },
981
+ "151765": {
982
+ "content": "<reserved_96>",
983
+ "lstrip": false,
984
+ "normalized": false,
985
+ "rstrip": false,
986
+ "single_word": false,
987
+ "special": true
988
+ },
989
+ "151766": {
990
+ "content": "<reserved_97>",
991
+ "lstrip": false,
992
+ "normalized": false,
993
+ "rstrip": false,
994
+ "single_word": false,
995
+ "special": true
996
+ },
997
+ "151767": {
998
+ "content": "<reserved_98>",
999
+ "lstrip": false,
1000
+ "normalized": false,
1001
+ "rstrip": false,
1002
+ "single_word": false,
1003
+ "special": true
1004
+ },
1005
+ "151768": {
1006
+ "content": "<reserved_99>",
1007
+ "lstrip": false,
1008
+ "normalized": false,
1009
+ "rstrip": false,
1010
+ "single_word": false,
1011
+ "special": true
1012
+ },
1013
+ "151769": {
1014
+ "content": "<reserved_100>",
1015
+ "lstrip": false,
1016
+ "normalized": false,
1017
+ "rstrip": false,
1018
+ "single_word": false,
1019
+ "special": true
1020
+ },
1021
+ "151770": {
1022
+ "content": "<reserved_101>",
1023
+ "lstrip": false,
1024
+ "normalized": false,
1025
+ "rstrip": false,
1026
+ "single_word": false,
1027
+ "special": true
1028
+ },
1029
+ "151771": {
1030
+ "content": "<reserved_102>",
1031
+ "lstrip": false,
1032
+ "normalized": false,
1033
+ "rstrip": false,
1034
+ "single_word": false,
1035
+ "special": true
1036
+ },
1037
+ "151772": {
1038
+ "content": "<reserved_103>",
1039
+ "lstrip": false,
1040
+ "normalized": false,
1041
+ "rstrip": false,
1042
+ "single_word": false,
1043
+ "special": true
1044
+ },
1045
+ "151773": {
1046
+ "content": "<reserved_104>",
1047
+ "lstrip": false,
1048
+ "normalized": false,
1049
+ "rstrip": false,
1050
+ "single_word": false,
1051
+ "special": true
1052
+ },
1053
+ "151774": {
1054
+ "content": "<reserved_105>",
1055
+ "lstrip": false,
1056
+ "normalized": false,
1057
+ "rstrip": false,
1058
+ "single_word": false,
1059
+ "special": true
1060
+ },
1061
+ "151775": {
1062
+ "content": "<reserved_106>",
1063
+ "lstrip": false,
1064
+ "normalized": false,
1065
+ "rstrip": false,
1066
+ "single_word": false,
1067
+ "special": true
1068
+ },
1069
+ "151776": {
1070
+ "content": "<reserved_107>",
1071
+ "lstrip": false,
1072
+ "normalized": false,
1073
+ "rstrip": false,
1074
+ "single_word": false,
1075
+ "special": true
1076
+ },
1077
+ "151777": {
1078
+ "content": "<reserved_108>",
1079
+ "lstrip": false,
1080
+ "normalized": false,
1081
+ "rstrip": false,
1082
+ "single_word": false,
1083
+ "special": true
1084
+ },
1085
+ "151778": {
1086
+ "content": "<reserved_109>",
1087
+ "lstrip": false,
1088
+ "normalized": false,
1089
+ "rstrip": false,
1090
+ "single_word": false,
1091
+ "special": true
1092
+ },
1093
+ "151779": {
1094
+ "content": "<reserved_110>",
1095
+ "lstrip": false,
1096
+ "normalized": false,
1097
+ "rstrip": false,
1098
+ "single_word": false,
1099
+ "special": true
1100
+ },
1101
+ "151780": {
1102
+ "content": "<reserved_111>",
1103
+ "lstrip": false,
1104
+ "normalized": false,
1105
+ "rstrip": false,
1106
+ "single_word": false,
1107
+ "special": true
1108
+ },
1109
+ "151781": {
1110
+ "content": "<reserved_112>",
1111
+ "lstrip": false,
1112
+ "normalized": false,
1113
+ "rstrip": false,
1114
+ "single_word": false,
1115
+ "special": true
1116
+ },
1117
+ "151782": {
1118
+ "content": "<reserved_113>",
1119
+ "lstrip": false,
1120
+ "normalized": false,
1121
+ "rstrip": false,
1122
+ "single_word": false,
1123
+ "special": true
1124
+ },
1125
+ "151783": {
1126
+ "content": "<reserved_114>",
1127
+ "lstrip": false,
1128
+ "normalized": false,
1129
+ "rstrip": false,
1130
+ "single_word": false,
1131
+ "special": true
1132
+ },
1133
+ "151784": {
1134
+ "content": "<reserved_115>",
1135
+ "lstrip": false,
1136
+ "normalized": false,
1137
+ "rstrip": false,
1138
+ "single_word": false,
1139
+ "special": true
1140
+ },
1141
+ "151785": {
1142
+ "content": "<reserved_116>",
1143
+ "lstrip": false,
1144
+ "normalized": false,
1145
+ "rstrip": false,
1146
+ "single_word": false,
1147
+ "special": true
1148
+ },
1149
+ "151786": {
1150
+ "content": "<reserved_117>",
1151
+ "lstrip": false,
1152
+ "normalized": false,
1153
+ "rstrip": false,
1154
+ "single_word": false,
1155
+ "special": true
1156
+ },
1157
+ "151787": {
1158
+ "content": "<reserved_118>",
1159
+ "lstrip": false,
1160
+ "normalized": false,
1161
+ "rstrip": false,
1162
+ "single_word": false,
1163
+ "special": true
1164
+ },
1165
+ "151788": {
1166
+ "content": "<reserved_119>",
1167
+ "lstrip": false,
1168
+ "normalized": false,
1169
+ "rstrip": false,
1170
+ "single_word": false,
1171
+ "special": true
1172
+ },
1173
+ "151789": {
1174
+ "content": "<reserved_120>",
1175
+ "lstrip": false,
1176
+ "normalized": false,
1177
+ "rstrip": false,
1178
+ "single_word": false,
1179
+ "special": true
1180
+ },
1181
+ "151790": {
1182
+ "content": "<reserved_121>",
1183
+ "lstrip": false,
1184
+ "normalized": false,
1185
+ "rstrip": false,
1186
+ "single_word": false,
1187
+ "special": true
1188
+ },
1189
+ "151791": {
1190
+ "content": "<reserved_122>",
1191
+ "lstrip": false,
1192
+ "normalized": false,
1193
+ "rstrip": false,
1194
+ "single_word": false,
1195
+ "special": true
1196
+ },
1197
+ "151792": {
1198
+ "content": "<reserved_123>",
1199
+ "lstrip": false,
1200
+ "normalized": false,
1201
+ "rstrip": false,
1202
+ "single_word": false,
1203
+ "special": true
1204
+ },
1205
+ "151793": {
1206
+ "content": "<reserved_124>",
1207
+ "lstrip": false,
1208
+ "normalized": false,
1209
+ "rstrip": false,
1210
+ "single_word": false,
1211
+ "special": true
1212
+ },
1213
+ "151794": {
1214
+ "content": "<reserved_125>",
1215
+ "lstrip": false,
1216
+ "normalized": false,
1217
+ "rstrip": false,
1218
+ "single_word": false,
1219
+ "special": true
1220
+ },
1221
+ "151795": {
1222
+ "content": "<reserved_126>",
1223
+ "lstrip": false,
1224
+ "normalized": false,
1225
+ "rstrip": false,
1226
+ "single_word": false,
1227
+ "special": true
1228
+ },
1229
+ "151796": {
1230
+ "content": "<reserved_127>",
1231
+ "lstrip": false,
1232
+ "normalized": false,
1233
+ "rstrip": false,
1234
+ "single_word": false,
1235
+ "special": true
1236
+ },
1237
+ "151797": {
1238
+ "content": "<reserved_128>",
1239
+ "lstrip": false,
1240
+ "normalized": false,
1241
+ "rstrip": false,
1242
+ "single_word": false,
1243
+ "special": true
1244
+ },
1245
+ "151798": {
1246
+ "content": "<reserved_129>",
1247
+ "lstrip": false,
1248
+ "normalized": false,
1249
+ "rstrip": false,
1250
+ "single_word": false,
1251
+ "special": true
1252
+ },
1253
+ "151799": {
1254
+ "content": "<reserved_130>",
1255
+ "lstrip": false,
1256
+ "normalized": false,
1257
+ "rstrip": false,
1258
+ "single_word": false,
1259
+ "special": true
1260
+ },
1261
+ "151800": {
1262
+ "content": "<reserved_131>",
1263
+ "lstrip": false,
1264
+ "normalized": false,
1265
+ "rstrip": false,
1266
+ "single_word": false,
1267
+ "special": true
1268
+ },
1269
+ "151801": {
1270
+ "content": "<reserved_132>",
1271
+ "lstrip": false,
1272
+ "normalized": false,
1273
+ "rstrip": false,
1274
+ "single_word": false,
1275
+ "special": true
1276
+ },
1277
+ "151802": {
1278
+ "content": "<reserved_133>",
1279
+ "lstrip": false,
1280
+ "normalized": false,
1281
+ "rstrip": false,
1282
+ "single_word": false,
1283
+ "special": true
1284
+ },
1285
+ "151803": {
1286
+ "content": "<reserved_134>",
1287
+ "lstrip": false,
1288
+ "normalized": false,
1289
+ "rstrip": false,
1290
+ "single_word": false,
1291
+ "special": true
1292
+ },
1293
+ "151804": {
1294
+ "content": "<reserved_135>",
1295
+ "lstrip": false,
1296
+ "normalized": false,
1297
+ "rstrip": false,
1298
+ "single_word": false,
1299
+ "special": true
1300
+ },
1301
+ "151805": {
1302
+ "content": "<reserved_136>",
1303
+ "lstrip": false,
1304
+ "normalized": false,
1305
+ "rstrip": false,
1306
+ "single_word": false,
1307
+ "special": true
1308
+ },
1309
+ "151806": {
1310
+ "content": "<reserved_137>",
1311
+ "lstrip": false,
1312
+ "normalized": false,
1313
+ "rstrip": false,
1314
+ "single_word": false,
1315
+ "special": true
1316
+ },
1317
+ "151807": {
1318
+ "content": "<reserved_138>",
1319
+ "lstrip": false,
1320
+ "normalized": false,
1321
+ "rstrip": false,
1322
+ "single_word": false,
1323
+ "special": true
1324
+ },
1325
+ "151808": {
1326
+ "content": "<reserved_139>",
1327
+ "lstrip": false,
1328
+ "normalized": false,
1329
+ "rstrip": false,
1330
+ "single_word": false,
1331
+ "special": true
1332
+ },
1333
+ "151809": {
1334
+ "content": "<reserved_140>",
1335
+ "lstrip": false,
1336
+ "normalized": false,
1337
+ "rstrip": false,
1338
+ "single_word": false,
1339
+ "special": true
1340
+ },
1341
+ "151810": {
1342
+ "content": "<reserved_141>",
1343
+ "lstrip": false,
1344
+ "normalized": false,
1345
+ "rstrip": false,
1346
+ "single_word": false,
1347
+ "special": true
1348
+ },
1349
+ "151811": {
1350
+ "content": "<reserved_142>",
1351
+ "lstrip": false,
1352
+ "normalized": false,
1353
+ "rstrip": false,
1354
+ "single_word": false,
1355
+ "special": true
1356
+ },
1357
+ "151812": {
1358
+ "content": "<reserved_143>",
1359
+ "lstrip": false,
1360
+ "normalized": false,
1361
+ "rstrip": false,
1362
+ "single_word": false,
1363
+ "special": true
1364
+ },
1365
+ "151813": {
1366
+ "content": "<reserved_144>",
1367
+ "lstrip": false,
1368
+ "normalized": false,
1369
+ "rstrip": false,
1370
+ "single_word": false,
1371
+ "special": true
1372
+ },
1373
+ "151814": {
1374
+ "content": "<reserved_145>",
1375
+ "lstrip": false,
1376
+ "normalized": false,
1377
+ "rstrip": false,
1378
+ "single_word": false,
1379
+ "special": true
1380
+ },
1381
+ "151815": {
1382
+ "content": "<reserved_146>",
1383
+ "lstrip": false,
1384
+ "normalized": false,
1385
+ "rstrip": false,
1386
+ "single_word": false,
1387
+ "special": true
1388
+ },
1389
+ "151816": {
1390
+ "content": "<reserved_147>",
1391
+ "lstrip": false,
1392
+ "normalized": false,
1393
+ "rstrip": false,
1394
+ "single_word": false,
1395
+ "special": true
1396
+ },
1397
+ "151817": {
1398
+ "content": "<reserved_148>",
1399
+ "lstrip": false,
1400
+ "normalized": false,
1401
+ "rstrip": false,
1402
+ "single_word": false,
1403
+ "special": true
1404
+ },
1405
+ "151818": {
1406
+ "content": "<reserved_149>",
1407
+ "lstrip": false,
1408
+ "normalized": false,
1409
+ "rstrip": false,
1410
+ "single_word": false,
1411
+ "special": true
1412
+ },
1413
+ "151819": {
1414
+ "content": "<reserved_150>",
1415
+ "lstrip": false,
1416
+ "normalized": false,
1417
+ "rstrip": false,
1418
+ "single_word": false,
1419
+ "special": true
1420
+ },
1421
+ "151820": {
1422
+ "content": "<reserved_151>",
1423
+ "lstrip": false,
1424
+ "normalized": false,
1425
+ "rstrip": false,
1426
+ "single_word": false,
1427
+ "special": true
1428
+ },
1429
+ "151821": {
1430
+ "content": "<reserved_152>",
1431
+ "lstrip": false,
1432
+ "normalized": false,
1433
+ "rstrip": false,
1434
+ "single_word": false,
1435
+ "special": true
1436
+ },
1437
+ "151822": {
1438
+ "content": "<reserved_153>",
1439
+ "lstrip": false,
1440
+ "normalized": false,
1441
+ "rstrip": false,
1442
+ "single_word": false,
1443
+ "special": true
1444
+ },
1445
+ "151823": {
1446
+ "content": "<reserved_154>",
1447
+ "lstrip": false,
1448
+ "normalized": false,
1449
+ "rstrip": false,
1450
+ "single_word": false,
1451
+ "special": true
1452
+ },
1453
+ "151824": {
1454
+ "content": "<reserved_155>",
1455
+ "lstrip": false,
1456
+ "normalized": false,
1457
+ "rstrip": false,
1458
+ "single_word": false,
1459
+ "special": true
1460
+ },
1461
+ "151825": {
1462
+ "content": "<reserved_156>",
1463
+ "lstrip": false,
1464
+ "normalized": false,
1465
+ "rstrip": false,
1466
+ "single_word": false,
1467
+ "special": true
1468
+ },
1469
+ "151826": {
1470
+ "content": "<reserved_157>",
1471
+ "lstrip": false,
1472
+ "normalized": false,
1473
+ "rstrip": false,
1474
+ "single_word": false,
1475
+ "special": true
1476
+ },
1477
+ "151827": {
1478
+ "content": "<reserved_158>",
1479
+ "lstrip": false,
1480
+ "normalized": false,
1481
+ "rstrip": false,
1482
+ "single_word": false,
1483
+ "special": true
1484
+ },
1485
+ "151828": {
1486
+ "content": "<reserved_159>",
1487
+ "lstrip": false,
1488
+ "normalized": false,
1489
+ "rstrip": false,
1490
+ "single_word": false,
1491
+ "special": true
1492
+ },
1493
+ "151829": {
1494
+ "content": "<reserved_160>",
1495
+ "lstrip": false,
1496
+ "normalized": false,
1497
+ "rstrip": false,
1498
+ "single_word": false,
1499
+ "special": true
1500
+ },
1501
+ "151830": {
1502
+ "content": "<reserved_161>",
1503
+ "lstrip": false,
1504
+ "normalized": false,
1505
+ "rstrip": false,
1506
+ "single_word": false,
1507
+ "special": true
1508
+ },
1509
+ "151831": {
1510
+ "content": "<reserved_162>",
1511
+ "lstrip": false,
1512
+ "normalized": false,
1513
+ "rstrip": false,
1514
+ "single_word": false,
1515
+ "special": true
1516
+ },
1517
+ "151832": {
1518
+ "content": "<reserved_163>",
1519
+ "lstrip": false,
1520
+ "normalized": false,
1521
+ "rstrip": false,
1522
+ "single_word": false,
1523
+ "special": true
1524
+ },
1525
+ "151833": {
1526
+ "content": "<reserved_164>",
1527
+ "lstrip": false,
1528
+ "normalized": false,
1529
+ "rstrip": false,
1530
+ "single_word": false,
1531
+ "special": true
1532
+ },
1533
+ "151834": {
1534
+ "content": "<reserved_165>",
1535
+ "lstrip": false,
1536
+ "normalized": false,
1537
+ "rstrip": false,
1538
+ "single_word": false,
1539
+ "special": true
1540
+ },
1541
+ "151835": {
1542
+ "content": "<reserved_166>",
1543
+ "lstrip": false,
1544
+ "normalized": false,
1545
+ "rstrip": false,
1546
+ "single_word": false,
1547
+ "special": true
1548
+ },
1549
+ "151836": {
1550
+ "content": "<reserved_167>",
1551
+ "lstrip": false,
1552
+ "normalized": false,
1553
+ "rstrip": false,
1554
+ "single_word": false,
1555
+ "special": true
1556
+ },
1557
+ "151837": {
1558
+ "content": "<reserved_168>",
1559
+ "lstrip": false,
1560
+ "normalized": false,
1561
+ "rstrip": false,
1562
+ "single_word": false,
1563
+ "special": true
1564
+ },
1565
+ "151838": {
1566
+ "content": "<reserved_169>",
1567
+ "lstrip": false,
1568
+ "normalized": false,
1569
+ "rstrip": false,
1570
+ "single_word": false,
1571
+ "special": true
1572
+ },
1573
+ "151839": {
1574
+ "content": "<reserved_170>",
1575
+ "lstrip": false,
1576
+ "normalized": false,
1577
+ "rstrip": false,
1578
+ "single_word": false,
1579
+ "special": true
1580
+ },
1581
+ "151840": {
1582
+ "content": "<reserved_171>",
1583
+ "lstrip": false,
1584
+ "normalized": false,
1585
+ "rstrip": false,
1586
+ "single_word": false,
1587
+ "special": true
1588
+ },
1589
+ "151841": {
1590
+ "content": "<reserved_172>",
1591
+ "lstrip": false,
1592
+ "normalized": false,
1593
+ "rstrip": false,
1594
+ "single_word": false,
1595
+ "special": true
1596
+ },
1597
+ "151842": {
1598
+ "content": "<reserved_173>",
1599
+ "lstrip": false,
1600
+ "normalized": false,
1601
+ "rstrip": false,
1602
+ "single_word": false,
1603
+ "special": true
1604
+ },
1605
+ "151843": {
1606
+ "content": "<reserved_174>",
1607
+ "lstrip": false,
1608
+ "normalized": false,
1609
+ "rstrip": false,
1610
+ "single_word": false,
1611
+ "special": true
1612
+ },
1613
+ "151844": {
1614
+ "content": "<reserved_175>",
1615
+ "lstrip": false,
1616
+ "normalized": false,
1617
+ "rstrip": false,
1618
+ "single_word": false,
1619
+ "special": true
1620
+ },
1621
+ "151845": {
1622
+ "content": "<reserved_176>",
1623
+ "lstrip": false,
1624
+ "normalized": false,
1625
+ "rstrip": false,
1626
+ "single_word": false,
1627
+ "special": true
1628
+ },
1629
+ "151846": {
1630
+ "content": "<reserved_177>",
1631
+ "lstrip": false,
1632
+ "normalized": false,
1633
+ "rstrip": false,
1634
+ "single_word": false,
1635
+ "special": true
1636
+ },
1637
+ "151847": {
1638
+ "content": "<reserved_178>",
1639
+ "lstrip": false,
1640
+ "normalized": false,
1641
+ "rstrip": false,
1642
+ "single_word": false,
1643
+ "special": true
1644
+ },
1645
+ "151848": {
1646
+ "content": "<reserved_179>",
1647
+ "lstrip": false,
1648
+ "normalized": false,
1649
+ "rstrip": false,
1650
+ "single_word": false,
1651
+ "special": true
1652
+ },
1653
+ "151849": {
1654
+ "content": "<reserved_180>",
1655
+ "lstrip": false,
1656
+ "normalized": false,
1657
+ "rstrip": false,
1658
+ "single_word": false,
1659
+ "special": true
1660
+ },
1661
+ "151850": {
1662
+ "content": "<reserved_181>",
1663
+ "lstrip": false,
1664
+ "normalized": false,
1665
+ "rstrip": false,
1666
+ "single_word": false,
1667
+ "special": true
1668
+ },
1669
+ "151851": {
1670
+ "content": "<reserved_182>",
1671
+ "lstrip": false,
1672
+ "normalized": false,
1673
+ "rstrip": false,
1674
+ "single_word": false,
1675
+ "special": true
1676
+ },
1677
+ "151852": {
1678
+ "content": "<reserved_183>",
1679
+ "lstrip": false,
1680
+ "normalized": false,
1681
+ "rstrip": false,
1682
+ "single_word": false,
1683
+ "special": true
1684
+ },
1685
+ "151853": {
1686
+ "content": "<reserved_184>",
1687
+ "lstrip": false,
1688
+ "normalized": false,
1689
+ "rstrip": false,
1690
+ "single_word": false,
1691
+ "special": true
1692
+ },
1693
+ "151854": {
1694
+ "content": "<reserved_185>",
1695
+ "lstrip": false,
1696
+ "normalized": false,
1697
+ "rstrip": false,
1698
+ "single_word": false,
1699
+ "special": true
1700
+ },
1701
+ "151855": {
1702
+ "content": "<reserved_186>",
1703
+ "lstrip": false,
1704
+ "normalized": false,
1705
+ "rstrip": false,
1706
+ "single_word": false,
1707
+ "special": true
1708
+ },
1709
+ "151856": {
1710
+ "content": "<reserved_187>",
1711
+ "lstrip": false,
1712
+ "normalized": false,
1713
+ "rstrip": false,
1714
+ "single_word": false,
1715
+ "special": true
1716
+ },
1717
+ "151857": {
1718
+ "content": "<reserved_188>",
1719
+ "lstrip": false,
1720
+ "normalized": false,
1721
+ "rstrip": false,
1722
+ "single_word": false,
1723
+ "special": true
1724
+ },
1725
+ "151858": {
1726
+ "content": "<reserved_189>",
1727
+ "lstrip": false,
1728
+ "normalized": false,
1729
+ "rstrip": false,
1730
+ "single_word": false,
1731
+ "special": true
1732
+ },
1733
+ "151859": {
1734
+ "content": "<reserved_190>",
1735
+ "lstrip": false,
1736
+ "normalized": false,
1737
+ "rstrip": false,
1738
+ "single_word": false,
1739
+ "special": true
1740
+ },
1741
+ "151860": {
1742
+ "content": "<reserved_191>",
1743
+ "lstrip": false,
1744
+ "normalized": false,
1745
+ "rstrip": false,
1746
+ "single_word": false,
1747
+ "special": true
1748
+ },
1749
+ "151861": {
1750
+ "content": "<reserved_192>",
1751
+ "lstrip": false,
1752
+ "normalized": false,
1753
+ "rstrip": false,
1754
+ "single_word": false,
1755
+ "special": true
1756
+ },
1757
+ "151862": {
1758
+ "content": "<reserved_193>",
1759
+ "lstrip": false,
1760
+ "normalized": false,
1761
+ "rstrip": false,
1762
+ "single_word": false,
1763
+ "special": true
1764
+ },
1765
+ "151863": {
1766
+ "content": "<reserved_194>",
1767
+ "lstrip": false,
1768
+ "normalized": false,
1769
+ "rstrip": false,
1770
+ "single_word": false,
1771
+ "special": true
1772
+ },
1773
+ "151864": {
1774
+ "content": "<reserved_195>",
1775
+ "lstrip": false,
1776
+ "normalized": false,
1777
+ "rstrip": false,
1778
+ "single_word": false,
1779
+ "special": true
1780
+ },
1781
+ "151865": {
1782
+ "content": "<reserved_196>",
1783
+ "lstrip": false,
1784
+ "normalized": false,
1785
+ "rstrip": false,
1786
+ "single_word": false,
1787
+ "special": true
1788
+ },
1789
+ "151866": {
1790
+ "content": "<reserved_197>",
1791
+ "lstrip": false,
1792
+ "normalized": false,
1793
+ "rstrip": false,
1794
+ "single_word": false,
1795
+ "special": true
1796
+ },
1797
+ "151867": {
1798
+ "content": "<reserved_198>",
1799
+ "lstrip": false,
1800
+ "normalized": false,
1801
+ "rstrip": false,
1802
+ "single_word": false,
1803
+ "special": true
1804
+ },
1805
+ "151868": {
1806
+ "content": "<reserved_199>",
1807
+ "lstrip": false,
1808
+ "normalized": false,
1809
+ "rstrip": false,
1810
+ "single_word": false,
1811
+ "special": true
1812
+ },
1813
+ "151869": {
1814
+ "content": "<reserved_200>",
1815
+ "lstrip": false,
1816
+ "normalized": false,
1817
+ "rstrip": false,
1818
+ "single_word": false,
1819
+ "special": true
1820
+ },
1821
+ "151870": {
1822
+ "content": "<reserved_201>",
1823
+ "lstrip": false,
1824
+ "normalized": false,
1825
+ "rstrip": false,
1826
+ "single_word": false,
1827
+ "special": true
1828
+ },
1829
+ "151871": {
1830
+ "content": "<reserved_202>",
1831
+ "lstrip": false,
1832
+ "normalized": false,
1833
+ "rstrip": false,
1834
+ "single_word": false,
1835
+ "special": true
1836
+ },
1837
+ "151872": {
1838
+ "content": "<reserved_203>",
1839
+ "lstrip": false,
1840
+ "normalized": false,
1841
+ "rstrip": false,
1842
+ "single_word": false,
1843
+ "special": true
1844
+ },
1845
+ "151873": {
1846
+ "content": "<reserved_204>",
1847
+ "lstrip": false,
1848
+ "normalized": false,
1849
+ "rstrip": false,
1850
+ "single_word": false,
1851
+ "special": true
1852
+ },
1853
+ "151874": {
1854
+ "content": "<reserved_205>",
1855
+ "lstrip": false,
1856
+ "normalized": false,
1857
+ "rstrip": false,
1858
+ "single_word": false,
1859
+ "special": true
1860
+ },
1861
+ "151875": {
1862
+ "content": "<reserved_206>",
1863
+ "lstrip": false,
1864
+ "normalized": false,
1865
+ "rstrip": false,
1866
+ "single_word": false,
1867
+ "special": true
1868
+ },
1869
+ "151876": {
1870
+ "content": "<reserved_207>",
1871
+ "lstrip": false,
1872
+ "normalized": false,
1873
+ "rstrip": false,
1874
+ "single_word": false,
1875
+ "special": true
1876
+ },
1877
+ "151877": {
1878
+ "content": "<reserved_208>",
1879
+ "lstrip": false,
1880
+ "normalized": false,
1881
+ "rstrip": false,
1882
+ "single_word": false,
1883
+ "special": true
1884
+ },
1885
+ "151878": {
1886
+ "content": "<reserved_209>",
1887
+ "lstrip": false,
1888
+ "normalized": false,
1889
+ "rstrip": false,
1890
+ "single_word": false,
1891
+ "special": true
1892
+ },
1893
+ "151879": {
1894
+ "content": "<reserved_210>",
1895
+ "lstrip": false,
1896
+ "normalized": false,
1897
+ "rstrip": false,
1898
+ "single_word": false,
1899
+ "special": true
1900
+ },
1901
+ "151880": {
1902
+ "content": "<reserved_211>",
1903
+ "lstrip": false,
1904
+ "normalized": false,
1905
+ "rstrip": false,
1906
+ "single_word": false,
1907
+ "special": true
1908
+ },
1909
+ "151881": {
1910
+ "content": "<reserved_212>",
1911
+ "lstrip": false,
1912
+ "normalized": false,
1913
+ "rstrip": false,
1914
+ "single_word": false,
1915
+ "special": true
1916
+ },
1917
+ "151882": {
1918
+ "content": "<reserved_213>",
1919
+ "lstrip": false,
1920
+ "normalized": false,
1921
+ "rstrip": false,
1922
+ "single_word": false,
1923
+ "special": true
1924
+ },
1925
+ "151883": {
1926
+ "content": "<reserved_214>",
1927
+ "lstrip": false,
1928
+ "normalized": false,
1929
+ "rstrip": false,
1930
+ "single_word": false,
1931
+ "special": true
1932
+ },
1933
+ "151884": {
1934
+ "content": "<reserved_215>",
1935
+ "lstrip": false,
1936
+ "normalized": false,
1937
+ "rstrip": false,
1938
+ "single_word": false,
1939
+ "special": true
1940
+ },
1941
+ "151885": {
1942
+ "content": "<reserved_216>",
1943
+ "lstrip": false,
1944
+ "normalized": false,
1945
+ "rstrip": false,
1946
+ "single_word": false,
1947
+ "special": true
1948
+ },
1949
+ "151886": {
1950
+ "content": "<reserved_217>",
1951
+ "lstrip": false,
1952
+ "normalized": false,
1953
+ "rstrip": false,
1954
+ "single_word": false,
1955
+ "special": true
1956
+ },
1957
+ "151887": {
1958
+ "content": "<reserved_218>",
1959
+ "lstrip": false,
1960
+ "normalized": false,
1961
+ "rstrip": false,
1962
+ "single_word": false,
1963
+ "special": true
1964
+ },
1965
+ "151888": {
1966
+ "content": "<reserved_219>",
1967
+ "lstrip": false,
1968
+ "normalized": false,
1969
+ "rstrip": false,
1970
+ "single_word": false,
1971
+ "special": true
1972
+ },
1973
+ "151889": {
1974
+ "content": "<reserved_220>",
1975
+ "lstrip": false,
1976
+ "normalized": false,
1977
+ "rstrip": false,
1978
+ "single_word": false,
1979
+ "special": true
1980
+ },
1981
+ "151890": {
1982
+ "content": "<reserved_221>",
1983
+ "lstrip": false,
1984
+ "normalized": false,
1985
+ "rstrip": false,
1986
+ "single_word": false,
1987
+ "special": true
1988
+ },
1989
+ "151891": {
1990
+ "content": "<reserved_222>",
1991
+ "lstrip": false,
1992
+ "normalized": false,
1993
+ "rstrip": false,
1994
+ "single_word": false,
1995
+ "special": true
1996
+ },
1997
+ "151892": {
1998
+ "content": "<reserved_223>",
1999
+ "lstrip": false,
2000
+ "normalized": false,
2001
+ "rstrip": false,
2002
+ "single_word": false,
2003
+ "special": true
2004
+ },
2005
+ "151893": {
2006
+ "content": "<reserved_224>",
2007
+ "lstrip": false,
2008
+ "normalized": false,
2009
+ "rstrip": false,
2010
+ "single_word": false,
2011
+ "special": true
2012
+ },
2013
+ "151894": {
2014
+ "content": "<reserved_225>",
2015
+ "lstrip": false,
2016
+ "normalized": false,
2017
+ "rstrip": false,
2018
+ "single_word": false,
2019
+ "special": true
2020
+ },
2021
+ "151895": {
2022
+ "content": "<reserved_226>",
2023
+ "lstrip": false,
2024
+ "normalized": false,
2025
+ "rstrip": false,
2026
+ "single_word": false,
2027
+ "special": true
2028
+ },
2029
+ "151896": {
2030
+ "content": "<reserved_227>",
2031
+ "lstrip": false,
2032
+ "normalized": false,
2033
+ "rstrip": false,
2034
+ "single_word": false,
2035
+ "special": true
2036
+ },
2037
+ "151897": {
2038
+ "content": "<reserved_228>",
2039
+ "lstrip": false,
2040
+ "normalized": false,
2041
+ "rstrip": false,
2042
+ "single_word": false,
2043
+ "special": true
2044
+ },
2045
+ "151898": {
2046
+ "content": "<reserved_229>",
2047
+ "lstrip": false,
2048
+ "normalized": false,
2049
+ "rstrip": false,
2050
+ "single_word": false,
2051
+ "special": true
2052
+ },
2053
+ "151899": {
2054
+ "content": "<reserved_230>",
2055
+ "lstrip": false,
2056
+ "normalized": false,
2057
+ "rstrip": false,
2058
+ "single_word": false,
2059
+ "special": true
2060
+ },
2061
+ "151900": {
2062
+ "content": "<reserved_231>",
2063
+ "lstrip": false,
2064
+ "normalized": false,
2065
+ "rstrip": false,
2066
+ "single_word": false,
2067
+ "special": true
2068
+ },
2069
+ "151901": {
2070
+ "content": "<reserved_232>",
2071
+ "lstrip": false,
2072
+ "normalized": false,
2073
+ "rstrip": false,
2074
+ "single_word": false,
2075
+ "special": true
2076
+ },
2077
+ "151902": {
2078
+ "content": "<reserved_233>",
2079
+ "lstrip": false,
2080
+ "normalized": false,
2081
+ "rstrip": false,
2082
+ "single_word": false,
2083
+ "special": true
2084
+ },
2085
+ "151903": {
2086
+ "content": "<reserved_234>",
2087
+ "lstrip": false,
2088
+ "normalized": false,
2089
+ "rstrip": false,
2090
+ "single_word": false,
2091
+ "special": true
2092
+ },
2093
+ "151904": {
2094
+ "content": "<reserved_235>",
2095
+ "lstrip": false,
2096
+ "normalized": false,
2097
+ "rstrip": false,
2098
+ "single_word": false,
2099
+ "special": true
2100
+ },
2101
+ "151905": {
2102
+ "content": "<reserved_236>",
2103
+ "lstrip": false,
2104
+ "normalized": false,
2105
+ "rstrip": false,
2106
+ "single_word": false,
2107
+ "special": true
2108
+ },
2109
+ "151906": {
2110
+ "content": "<reserved_237>",
2111
+ "lstrip": false,
2112
+ "normalized": false,
2113
+ "rstrip": false,
2114
+ "single_word": false,
2115
+ "special": true
2116
+ },
2117
+ "151907": {
2118
+ "content": "<reserved_238>",
2119
+ "lstrip": false,
2120
+ "normalized": false,
2121
+ "rstrip": false,
2122
+ "single_word": false,
2123
+ "special": true
2124
+ },
2125
+ "151908": {
2126
+ "content": "<reserved_239>",
2127
+ "lstrip": false,
2128
+ "normalized": false,
2129
+ "rstrip": false,
2130
+ "single_word": false,
2131
+ "special": true
2132
+ },
2133
+ "151909": {
2134
+ "content": "<reserved_240>",
2135
+ "lstrip": false,
2136
+ "normalized": false,
2137
+ "rstrip": false,
2138
+ "single_word": false,
2139
+ "special": true
2140
+ },
2141
+ "151910": {
2142
+ "content": "<reserved_241>",
2143
+ "lstrip": false,
2144
+ "normalized": false,
2145
+ "rstrip": false,
2146
+ "single_word": false,
2147
+ "special": true
2148
+ },
2149
+ "151911": {
2150
+ "content": "<reserved_242>",
2151
+ "lstrip": false,
2152
+ "normalized": false,
2153
+ "rstrip": false,
2154
+ "single_word": false,
2155
+ "special": true
2156
+ },
2157
+ "151912": {
2158
+ "content": "<reserved_243>",
2159
+ "lstrip": false,
2160
+ "normalized": false,
2161
+ "rstrip": false,
2162
+ "single_word": false,
2163
+ "special": true
2164
+ },
2165
+ "151913": {
2166
+ "content": "<reserved_244>",
2167
+ "lstrip": false,
2168
+ "normalized": false,
2169
+ "rstrip": false,
2170
+ "single_word": false,
2171
+ "special": true
2172
+ },
2173
+ "151914": {
2174
+ "content": "<reserved_245>",
2175
+ "lstrip": false,
2176
+ "normalized": false,
2177
+ "rstrip": false,
2178
+ "single_word": false,
2179
+ "special": true
2180
+ },
2181
+ "151915": {
2182
+ "content": "<reserved_246>",
2183
+ "lstrip": false,
2184
+ "normalized": false,
2185
+ "rstrip": false,
2186
+ "single_word": false,
2187
+ "special": true
2188
+ },
2189
+ "151916": {
2190
+ "content": "<reserved_247>",
2191
+ "lstrip": false,
2192
+ "normalized": false,
2193
+ "rstrip": false,
2194
+ "single_word": false,
2195
+ "special": true
2196
+ },
2197
+ "151917": {
2198
+ "content": "<reserved_248>",
2199
+ "lstrip": false,
2200
+ "normalized": false,
2201
+ "rstrip": false,
2202
+ "single_word": false,
2203
+ "special": true
2204
+ },
2205
+ "151918": {
2206
+ "content": "<reserved_249>",
2207
+ "lstrip": false,
2208
+ "normalized": false,
2209
+ "rstrip": false,
2210
+ "single_word": false,
2211
+ "special": true
2212
+ },
2213
+ "151919": {
2214
+ "content": "<reserved_250>",
2215
+ "lstrip": false,
2216
+ "normalized": false,
2217
+ "rstrip": false,
2218
+ "single_word": false,
2219
+ "special": true
2220
+ },
2221
+ "151920": {
2222
+ "content": "<reserved_251>",
2223
+ "lstrip": false,
2224
+ "normalized": false,
2225
+ "rstrip": false,
2226
+ "single_word": false,
2227
+ "special": true
2228
+ },
2229
+ "151921": {
2230
+ "content": "<reserved_252>",
2231
+ "lstrip": false,
2232
+ "normalized": false,
2233
+ "rstrip": false,
2234
+ "single_word": false,
2235
+ "special": true
2236
+ },
2237
+ "151922": {
2238
+ "content": "<reserved_253>",
2239
+ "lstrip": false,
2240
+ "normalized": false,
2241
+ "rstrip": false,
2242
+ "single_word": false,
2243
+ "special": true
2244
+ },
2245
+ "151923": {
2246
+ "content": "<reserved_254>",
2247
+ "lstrip": false,
2248
+ "normalized": false,
2249
+ "rstrip": false,
2250
+ "single_word": false,
2251
+ "special": true
2252
+ },
2253
+ "151924": {
2254
+ "content": "<reserved_255>",
2255
+ "lstrip": false,
2256
+ "normalized": false,
2257
+ "rstrip": false,
2258
+ "single_word": false,
2259
+ "special": true
2260
+ },
2261
+ "151925": {
2262
+ "content": "<reserved_256>",
2263
+ "lstrip": false,
2264
+ "normalized": false,
2265
+ "rstrip": false,
2266
+ "single_word": false,
2267
+ "special": true
2268
+ },
2269
+ "151926": {
2270
+ "content": "<reserved_257>",
2271
+ "lstrip": false,
2272
+ "normalized": false,
2273
+ "rstrip": false,
2274
+ "single_word": false,
2275
+ "special": true
2276
+ },
2277
+ "151927": {
2278
+ "content": "<reserved_258>",
2279
+ "lstrip": false,
2280
+ "normalized": false,
2281
+ "rstrip": false,
2282
+ "single_word": false,
2283
+ "special": true
2284
+ },
2285
+ "151928": {
2286
+ "content": "<reserved_259>",
2287
+ "lstrip": false,
2288
+ "normalized": false,
2289
+ "rstrip": false,
2290
+ "single_word": false,
2291
+ "special": true
2292
+ },
2293
+ "151929": {
2294
+ "content": "<reserved_260>",
2295
+ "lstrip": false,
2296
+ "normalized": false,
2297
+ "rstrip": false,
2298
+ "single_word": false,
2299
+ "special": true
2300
+ },
2301
+ "151930": {
2302
+ "content": "<reserved_261>",
2303
+ "lstrip": false,
2304
+ "normalized": false,
2305
+ "rstrip": false,
2306
+ "single_word": false,
2307
+ "special": true
2308
+ },
2309
+ "151931": {
2310
+ "content": "<reserved_262>",
2311
+ "lstrip": false,
2312
+ "normalized": false,
2313
+ "rstrip": false,
2314
+ "single_word": false,
2315
+ "special": true
2316
+ },
2317
+ "151932": {
2318
+ "content": "<reserved_263>",
2319
+ "lstrip": false,
2320
+ "normalized": false,
2321
+ "rstrip": false,
2322
+ "single_word": false,
2323
+ "special": true
2324
+ },
2325
+ "151933": {
2326
+ "content": "<reserved_264>",
2327
+ "lstrip": false,
2328
+ "normalized": false,
2329
+ "rstrip": false,
2330
+ "single_word": false,
2331
+ "special": true
2332
+ },
2333
+ "151934": {
2334
+ "content": "<reserved_265>",
2335
+ "lstrip": false,
2336
+ "normalized": false,
2337
+ "rstrip": false,
2338
+ "single_word": false,
2339
+ "special": true
2340
+ },
2341
+ "151935": {
2342
+ "content": "<reserved_266>",
2343
+ "lstrip": false,
2344
+ "normalized": false,
2345
+ "rstrip": false,
2346
+ "single_word": false,
2347
+ "special": true
2348
  }
2349
  },
2350
  "additional_special_tokens": [