codgician commited on
Commit
91e1872
·
1 Parent(s): 89fa0f8

Upgrade to C4 calibration (1024 samples) for better general-purpose performance

Browse files
README.md CHANGED
@@ -18,8 +18,6 @@ pipeline_tag: image-text-to-text
18
 
19
  # Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-GPTQ-int4
20
 
21
- > ⚠️ **Caution**: This quantization is not well tested. Use at your own risk and please report any issues.
22
-
23
  This is a **GPTQ INT4 quantized** version of [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled).
24
 
25
  Please refer to the original model card for details on the model architecture, training data, and capabilities.
@@ -30,7 +28,7 @@ Please refer to the original model card for details on the model architecture, t
30
 
31
  - **Method**: GPTQ (4-bit INT4, W4A16)
32
  - **Group Size**: 128
33
- - **Calibration**: 200 samples from wikitext
34
  - **Vision Encoder**: Preserved (not quantized)
35
  - **MTP Module**: Preserved (not quantized)
36
 
 
18
 
19
  # Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-GPTQ-int4
20
 
 
 
21
  This is a **GPTQ INT4 quantized** version of [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled).
22
 
23
  Please refer to the original model card for details on the model architecture, training data, and capabilities.
 
28
 
29
  - **Method**: GPTQ (4-bit INT4, W4A16)
30
  - **Group Size**: 128
31
+ - **Calibration**: 1024 samples from C4 dataset
32
  - **Vision Encoder**: Preserved (not quantized)
33
  - **MTP Module**: Preserved (not quantized)
34
 
config.json CHANGED
@@ -3,60 +3,18 @@
3
  "Qwen3_5ForConditionalGeneration"
4
  ],
5
  "bos_token_id": null,
6
- "dtype": "bfloat16",
7
  "eos_token_id": 248046,
8
  "image_token_id": 248056,
9
  "model_name": "qwen/Qwen3.5-27B",
10
  "model_type": "qwen3_5",
11
  "pad_token_id": 248044,
12
- "quantization_config": {
13
- "bits": 4,
14
- "checkpoint_format": "gptq",
15
- "desc_act": false,
16
- "format": "gptq",
17
- "group_size": 128,
18
- "lm_head": false,
19
- "meta": {
20
- "act_group_aware": true,
21
- "auto_forward_data_parallel": true,
22
- "damp_auto_increment": 0.01,
23
- "damp_percent": 0.05,
24
- "failsafe": {
25
- "smooth": null,
26
- "strategy": "rtn",
27
- "threshold": "0.5%"
28
- },
29
- "gc_mode": "interval",
30
- "gptaq": null,
31
- "hessian": {
32
- "chunk_bytes": null,
33
- "chunk_size": null,
34
- "staging_dtype": "float32"
35
- },
36
- "mock_quantization": false,
37
- "mse": 0.0,
38
- "offload_to_disk": false,
39
- "offload_to_disk_path": null,
40
- "pack_impl": "cuda",
41
- "quantizer": [
42
- "gptqmodel:5.8.0"
43
- ],
44
- "static_groups": false,
45
- "true_sequential": true,
46
- "uri": "https://github.com/modelcloud/gptqmodel",
47
- "vram_strategy": "exclusive",
48
- "wait_for_submodule_finalizers": false
49
- },
50
- "pack_dtype": "int32",
51
- "quant_method": "gptq",
52
- "sym": true
53
- },
54
  "text_config": {
55
  "attention_bias": false,
56
  "attention_dropout": 0.0,
57
  "attn_output_gate": true,
58
  "bos_token_id": null,
59
- "dtype": "bfloat16",
60
  "eos_token_id": 248044,
61
  "full_attention_interval": 4,
62
  "head_dim": 256,
@@ -163,14 +121,13 @@
163
  "vocab_size": 248320
164
  },
165
  "tie_word_embeddings": false,
166
- "transformers_version": "5.3.0",
167
  "unsloth_version": "2026.3.3",
168
  "use_cache": false,
169
  "video_token_id": 248057,
170
  "vision_config": {
171
  "deepstack_visual_indexes": [],
172
  "depth": 27,
173
- "dtype": "bfloat16",
174
  "hidden_act": "gelu_pytorch_tanh",
175
  "hidden_size": 1152,
176
  "in_channels": 3,
@@ -185,5 +142,47 @@
185
  "temporal_patch_size": 2
186
  },
187
  "vision_end_token_id": 248054,
188
- "vision_start_token_id": 248053
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "Qwen3_5ForConditionalGeneration"
4
  ],
5
  "bos_token_id": null,
6
+ "torch_dtype": "bfloat16",
7
  "eos_token_id": 248046,
8
  "image_token_id": 248056,
9
  "model_name": "qwen/Qwen3.5-27B",
10
  "model_type": "qwen3_5",
11
  "pad_token_id": 248044,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "text_config": {
13
  "attention_bias": false,
14
  "attention_dropout": 0.0,
15
  "attn_output_gate": true,
16
  "bos_token_id": null,
17
+ "torch_dtype": "bfloat16",
18
  "eos_token_id": 248044,
19
  "full_attention_interval": 4,
20
  "head_dim": 256,
 
121
  "vocab_size": 248320
122
  },
123
  "tie_word_embeddings": false,
 
124
  "unsloth_version": "2026.3.3",
125
  "use_cache": false,
126
  "video_token_id": 248057,
127
  "vision_config": {
128
  "deepstack_visual_indexes": [],
129
  "depth": 27,
130
+ "torch_dtype": "bfloat16",
131
  "hidden_act": "gelu_pytorch_tanh",
132
  "hidden_size": 1152,
133
  "in_channels": 3,
 
142
  "temporal_patch_size": 2
143
  },
144
  "vision_end_token_id": 248054,
145
+ "vision_start_token_id": 248053,
146
+ "quantization_config": {
147
+ "bits": 4,
148
+ "checkpoint_format": "gptq",
149
+ "desc_act": false,
150
+ "format": "gptq",
151
+ "group_size": 128,
152
+ "lm_head": false,
153
+ "meta": {
154
+ "act_group_aware": true,
155
+ "auto_forward_data_parallel": true,
156
+ "damp_auto_increment": 0.01,
157
+ "damp_percent": 0.05,
158
+ "failsafe": {
159
+ "smooth": null,
160
+ "strategy": "rtn",
161
+ "threshold": "0.5%"
162
+ },
163
+ "gc_mode": "interval",
164
+ "gptaq": null,
165
+ "hessian": {
166
+ "chunk_bytes": null,
167
+ "chunk_size": null,
168
+ "staging_dtype": "float32"
169
+ },
170
+ "mock_quantization": false,
171
+ "mse": 0.0,
172
+ "offload_to_disk": false,
173
+ "offload_to_disk_path": null,
174
+ "pack_impl": "cpu",
175
+ "quantizer": [
176
+ "gptqmodel:5.8.0"
177
+ ],
178
+ "static_groups": false,
179
+ "true_sequential": true,
180
+ "uri": "https://github.com/modelcloud/gptqmodel",
181
+ "vram_strategy": "exclusive",
182
+ "wait_for_submodule_finalizers": false
183
+ },
184
+ "pack_dtype": "int32",
185
+ "quant_method": "gptq",
186
+ "sym": true
187
+ }
188
+ }
generation_config.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "do_sample": true,
4
- "eos_token_id": 248046,
5
- "pad_token_id": 248044,
6
- "transformers_version": "5.3.0",
7
- "use_cache": false
8
- }
 
 
 
 
 
 
 
 
 
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ec342275d65b96a8f4d6f327b7c9a8bcee14f5f0df6661bfe6417cd2c369f67
3
- size 3516829252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bf293c0532d32964a6fc226d6536dd32c890bb5c313a4b3d35aadab152f2a4
3
+ size 2595332032
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cbe24e232f25397656b68a157b9745868b4ddc1f3c44b36616f8f8380ba45eb
3
- size 4278206560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:226401a7b406a545d70c92b6182c44bd9e1eafc8366a05f3f8341ef3f02b668c
3
+ size 4278206312
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01f8770fb7707fd91158ad5118d86bc047347c5e77ae50908e2302b0d2b919c3
3
- size 4258595143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf8aae8dad61d9993b67c6e2599e686479a78a05239d98a46bbacfc205a8f5e
3
+ size 4258595176
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6f3e7929e7c21674db12212d3a557e8a0f87b3600c39af62d4a1aff66b79533
3
- size 4284981458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7687a67d0c57f3ac10ff60a29cf6adac7f0f2a060209ad8a3841c7cdf466104
3
+ size 4284981504
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505357ea6262ab8630bfe13ab74ece02359f2cdf509b9fd41ed63be3b8e8794d
3
- size 2371438154
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26c195735990ff0d26de40f7c09735e12200d400f2094601185677fb2f4804e
3
+ size 2371438184
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 19559450959
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00005.safetensors",
@@ -2054,353 +2054,353 @@
2054
  "model.language_model.layers.9.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
2055
  "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
2056
  "model.language_model.norm.weight": "model-00001-of-00005.safetensors",
2057
- "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00005.safetensors",
2058
- "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00005.safetensors",
2059
- "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00005.safetensors",
2060
- "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00005.safetensors",
2061
- "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2062
- "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2063
- "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2064
- "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2065
- "model.visual.blocks.0.norm1.bias": "model-00001-of-00005.safetensors",
2066
- "model.visual.blocks.0.norm1.weight": "model-00001-of-00005.safetensors",
2067
- "model.visual.blocks.0.norm2.bias": "model-00001-of-00005.safetensors",
2068
- "model.visual.blocks.0.norm2.weight": "model-00001-of-00005.safetensors",
2069
- "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00005.safetensors",
2070
- "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00005.safetensors",
2071
- "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00005.safetensors",
2072
- "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00005.safetensors",
2073
- "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2074
- "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2075
- "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2076
- "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2077
- "model.visual.blocks.1.norm1.bias": "model-00001-of-00005.safetensors",
2078
- "model.visual.blocks.1.norm1.weight": "model-00001-of-00005.safetensors",
2079
- "model.visual.blocks.1.norm2.bias": "model-00001-of-00005.safetensors",
2080
- "model.visual.blocks.1.norm2.weight": "model-00001-of-00005.safetensors",
2081
- "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00005.safetensors",
2082
- "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00005.safetensors",
2083
- "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00005.safetensors",
2084
- "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00005.safetensors",
2085
- "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2086
- "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2087
- "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2088
- "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2089
- "model.visual.blocks.10.norm1.bias": "model-00001-of-00005.safetensors",
2090
- "model.visual.blocks.10.norm1.weight": "model-00001-of-00005.safetensors",
2091
- "model.visual.blocks.10.norm2.bias": "model-00001-of-00005.safetensors",
2092
- "model.visual.blocks.10.norm2.weight": "model-00001-of-00005.safetensors",
2093
- "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00005.safetensors",
2094
- "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00005.safetensors",
2095
- "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00005.safetensors",
2096
- "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00005.safetensors",
2097
- "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2098
- "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2099
- "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2100
- "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2101
- "model.visual.blocks.11.norm1.bias": "model-00001-of-00005.safetensors",
2102
- "model.visual.blocks.11.norm1.weight": "model-00001-of-00005.safetensors",
2103
- "model.visual.blocks.11.norm2.bias": "model-00001-of-00005.safetensors",
2104
- "model.visual.blocks.11.norm2.weight": "model-00001-of-00005.safetensors",
2105
- "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00005.safetensors",
2106
- "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00005.safetensors",
2107
- "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00005.safetensors",
2108
- "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00005.safetensors",
2109
- "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2110
- "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2111
- "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2112
- "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2113
- "model.visual.blocks.12.norm1.bias": "model-00001-of-00005.safetensors",
2114
- "model.visual.blocks.12.norm1.weight": "model-00001-of-00005.safetensors",
2115
- "model.visual.blocks.12.norm2.bias": "model-00001-of-00005.safetensors",
2116
- "model.visual.blocks.12.norm2.weight": "model-00001-of-00005.safetensors",
2117
- "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00005.safetensors",
2118
- "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00005.safetensors",
2119
- "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00005.safetensors",
2120
- "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00005.safetensors",
2121
- "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2122
- "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2123
- "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2124
- "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2125
- "model.visual.blocks.13.norm1.bias": "model-00001-of-00005.safetensors",
2126
- "model.visual.blocks.13.norm1.weight": "model-00001-of-00005.safetensors",
2127
- "model.visual.blocks.13.norm2.bias": "model-00001-of-00005.safetensors",
2128
- "model.visual.blocks.13.norm2.weight": "model-00001-of-00005.safetensors",
2129
- "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00005.safetensors",
2130
- "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00005.safetensors",
2131
- "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00005.safetensors",
2132
- "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00005.safetensors",
2133
- "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2134
- "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2135
- "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2136
- "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2137
- "model.visual.blocks.14.norm1.bias": "model-00001-of-00005.safetensors",
2138
- "model.visual.blocks.14.norm1.weight": "model-00001-of-00005.safetensors",
2139
- "model.visual.blocks.14.norm2.bias": "model-00001-of-00005.safetensors",
2140
- "model.visual.blocks.14.norm2.weight": "model-00001-of-00005.safetensors",
2141
- "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00005.safetensors",
2142
- "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00005.safetensors",
2143
- "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00005.safetensors",
2144
- "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00005.safetensors",
2145
- "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2146
- "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2147
- "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2148
- "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2149
- "model.visual.blocks.15.norm1.bias": "model-00001-of-00005.safetensors",
2150
- "model.visual.blocks.15.norm1.weight": "model-00001-of-00005.safetensors",
2151
- "model.visual.blocks.15.norm2.bias": "model-00001-of-00005.safetensors",
2152
- "model.visual.blocks.15.norm2.weight": "model-00001-of-00005.safetensors",
2153
- "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00005.safetensors",
2154
- "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00005.safetensors",
2155
- "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00005.safetensors",
2156
- "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00005.safetensors",
2157
- "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2158
- "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2159
- "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2160
- "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2161
- "model.visual.blocks.16.norm1.bias": "model-00001-of-00005.safetensors",
2162
- "model.visual.blocks.16.norm1.weight": "model-00001-of-00005.safetensors",
2163
- "model.visual.blocks.16.norm2.bias": "model-00001-of-00005.safetensors",
2164
- "model.visual.blocks.16.norm2.weight": "model-00001-of-00005.safetensors",
2165
- "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00005.safetensors",
2166
- "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00005.safetensors",
2167
- "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00005.safetensors",
2168
- "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00005.safetensors",
2169
- "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2170
- "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2171
- "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2172
- "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2173
- "model.visual.blocks.17.norm1.bias": "model-00001-of-00005.safetensors",
2174
- "model.visual.blocks.17.norm1.weight": "model-00001-of-00005.safetensors",
2175
- "model.visual.blocks.17.norm2.bias": "model-00001-of-00005.safetensors",
2176
- "model.visual.blocks.17.norm2.weight": "model-00001-of-00005.safetensors",
2177
- "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00005.safetensors",
2178
- "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00005.safetensors",
2179
- "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00005.safetensors",
2180
- "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00005.safetensors",
2181
- "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2182
- "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2183
- "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2184
- "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2185
- "model.visual.blocks.18.norm1.bias": "model-00001-of-00005.safetensors",
2186
- "model.visual.blocks.18.norm1.weight": "model-00001-of-00005.safetensors",
2187
- "model.visual.blocks.18.norm2.bias": "model-00001-of-00005.safetensors",
2188
- "model.visual.blocks.18.norm2.weight": "model-00001-of-00005.safetensors",
2189
- "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00005.safetensors",
2190
- "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00005.safetensors",
2191
- "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00005.safetensors",
2192
- "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00005.safetensors",
2193
- "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2194
- "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2195
- "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2196
- "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2197
- "model.visual.blocks.19.norm1.bias": "model-00001-of-00005.safetensors",
2198
- "model.visual.blocks.19.norm1.weight": "model-00001-of-00005.safetensors",
2199
- "model.visual.blocks.19.norm2.bias": "model-00001-of-00005.safetensors",
2200
- "model.visual.blocks.19.norm2.weight": "model-00001-of-00005.safetensors",
2201
- "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00005.safetensors",
2202
- "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00005.safetensors",
2203
- "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00005.safetensors",
2204
- "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00005.safetensors",
2205
- "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2206
- "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2207
- "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2208
- "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2209
- "model.visual.blocks.2.norm1.bias": "model-00001-of-00005.safetensors",
2210
- "model.visual.blocks.2.norm1.weight": "model-00001-of-00005.safetensors",
2211
- "model.visual.blocks.2.norm2.bias": "model-00001-of-00005.safetensors",
2212
- "model.visual.blocks.2.norm2.weight": "model-00001-of-00005.safetensors",
2213
- "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00005.safetensors",
2214
- "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00005.safetensors",
2215
- "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00005.safetensors",
2216
- "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00005.safetensors",
2217
- "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2218
- "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2219
- "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2220
- "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2221
- "model.visual.blocks.20.norm1.bias": "model-00001-of-00005.safetensors",
2222
- "model.visual.blocks.20.norm1.weight": "model-00001-of-00005.safetensors",
2223
- "model.visual.blocks.20.norm2.bias": "model-00001-of-00005.safetensors",
2224
- "model.visual.blocks.20.norm2.weight": "model-00001-of-00005.safetensors",
2225
- "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00005.safetensors",
2226
- "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00005.safetensors",
2227
- "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00005.safetensors",
2228
- "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00005.safetensors",
2229
- "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2230
- "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2231
- "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2232
- "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2233
- "model.visual.blocks.21.norm1.bias": "model-00001-of-00005.safetensors",
2234
- "model.visual.blocks.21.norm1.weight": "model-00001-of-00005.safetensors",
2235
- "model.visual.blocks.21.norm2.bias": "model-00001-of-00005.safetensors",
2236
- "model.visual.blocks.21.norm2.weight": "model-00001-of-00005.safetensors",
2237
- "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00005.safetensors",
2238
- "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00005.safetensors",
2239
- "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00005.safetensors",
2240
- "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00005.safetensors",
2241
- "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2242
- "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2243
- "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2244
- "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2245
- "model.visual.blocks.22.norm1.bias": "model-00001-of-00005.safetensors",
2246
- "model.visual.blocks.22.norm1.weight": "model-00001-of-00005.safetensors",
2247
- "model.visual.blocks.22.norm2.bias": "model-00001-of-00005.safetensors",
2248
- "model.visual.blocks.22.norm2.weight": "model-00001-of-00005.safetensors",
2249
- "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00005.safetensors",
2250
- "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00005.safetensors",
2251
- "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00005.safetensors",
2252
- "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00005.safetensors",
2253
- "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2254
- "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2255
- "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2256
- "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2257
- "model.visual.blocks.23.norm1.bias": "model-00001-of-00005.safetensors",
2258
- "model.visual.blocks.23.norm1.weight": "model-00001-of-00005.safetensors",
2259
- "model.visual.blocks.23.norm2.bias": "model-00001-of-00005.safetensors",
2260
- "model.visual.blocks.23.norm2.weight": "model-00001-of-00005.safetensors",
2261
- "model.visual.blocks.24.attn.proj.bias": "model-00001-of-00005.safetensors",
2262
- "model.visual.blocks.24.attn.proj.weight": "model-00001-of-00005.safetensors",
2263
- "model.visual.blocks.24.attn.qkv.bias": "model-00001-of-00005.safetensors",
2264
- "model.visual.blocks.24.attn.qkv.weight": "model-00001-of-00005.safetensors",
2265
- "model.visual.blocks.24.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2266
- "model.visual.blocks.24.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2267
- "model.visual.blocks.24.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2268
- "model.visual.blocks.24.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2269
- "model.visual.blocks.24.norm1.bias": "model-00001-of-00005.safetensors",
2270
- "model.visual.blocks.24.norm1.weight": "model-00001-of-00005.safetensors",
2271
- "model.visual.blocks.24.norm2.bias": "model-00001-of-00005.safetensors",
2272
- "model.visual.blocks.24.norm2.weight": "model-00001-of-00005.safetensors",
2273
- "model.visual.blocks.25.attn.proj.bias": "model-00001-of-00005.safetensors",
2274
- "model.visual.blocks.25.attn.proj.weight": "model-00001-of-00005.safetensors",
2275
- "model.visual.blocks.25.attn.qkv.bias": "model-00001-of-00005.safetensors",
2276
- "model.visual.blocks.25.attn.qkv.weight": "model-00001-of-00005.safetensors",
2277
- "model.visual.blocks.25.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2278
- "model.visual.blocks.25.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2279
- "model.visual.blocks.25.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2280
- "model.visual.blocks.25.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2281
- "model.visual.blocks.25.norm1.bias": "model-00001-of-00005.safetensors",
2282
- "model.visual.blocks.25.norm1.weight": "model-00001-of-00005.safetensors",
2283
- "model.visual.blocks.25.norm2.bias": "model-00001-of-00005.safetensors",
2284
- "model.visual.blocks.25.norm2.weight": "model-00001-of-00005.safetensors",
2285
- "model.visual.blocks.26.attn.proj.bias": "model-00001-of-00005.safetensors",
2286
- "model.visual.blocks.26.attn.proj.weight": "model-00001-of-00005.safetensors",
2287
- "model.visual.blocks.26.attn.qkv.bias": "model-00001-of-00005.safetensors",
2288
- "model.visual.blocks.26.attn.qkv.weight": "model-00001-of-00005.safetensors",
2289
- "model.visual.blocks.26.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2290
- "model.visual.blocks.26.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2291
- "model.visual.blocks.26.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2292
- "model.visual.blocks.26.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2293
- "model.visual.blocks.26.norm1.bias": "model-00001-of-00005.safetensors",
2294
- "model.visual.blocks.26.norm1.weight": "model-00001-of-00005.safetensors",
2295
- "model.visual.blocks.26.norm2.bias": "model-00001-of-00005.safetensors",
2296
- "model.visual.blocks.26.norm2.weight": "model-00001-of-00005.safetensors",
2297
- "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00005.safetensors",
2298
- "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00005.safetensors",
2299
- "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00005.safetensors",
2300
- "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00005.safetensors",
2301
- "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2302
- "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2303
- "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2304
- "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2305
- "model.visual.blocks.3.norm1.bias": "model-00001-of-00005.safetensors",
2306
- "model.visual.blocks.3.norm1.weight": "model-00001-of-00005.safetensors",
2307
- "model.visual.blocks.3.norm2.bias": "model-00001-of-00005.safetensors",
2308
- "model.visual.blocks.3.norm2.weight": "model-00001-of-00005.safetensors",
2309
- "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00005.safetensors",
2310
- "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00005.safetensors",
2311
- "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00005.safetensors",
2312
- "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00005.safetensors",
2313
- "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2314
- "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2315
- "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2316
- "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2317
- "model.visual.blocks.4.norm1.bias": "model-00001-of-00005.safetensors",
2318
- "model.visual.blocks.4.norm1.weight": "model-00001-of-00005.safetensors",
2319
- "model.visual.blocks.4.norm2.bias": "model-00001-of-00005.safetensors",
2320
- "model.visual.blocks.4.norm2.weight": "model-00001-of-00005.safetensors",
2321
- "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00005.safetensors",
2322
- "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00005.safetensors",
2323
- "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00005.safetensors",
2324
- "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00005.safetensors",
2325
- "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2326
- "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2327
- "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2328
- "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2329
- "model.visual.blocks.5.norm1.bias": "model-00001-of-00005.safetensors",
2330
- "model.visual.blocks.5.norm1.weight": "model-00001-of-00005.safetensors",
2331
- "model.visual.blocks.5.norm2.bias": "model-00001-of-00005.safetensors",
2332
- "model.visual.blocks.5.norm2.weight": "model-00001-of-00005.safetensors",
2333
- "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00005.safetensors",
2334
- "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00005.safetensors",
2335
- "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00005.safetensors",
2336
- "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00005.safetensors",
2337
- "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2338
- "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2339
- "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2340
- "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2341
- "model.visual.blocks.6.norm1.bias": "model-00001-of-00005.safetensors",
2342
- "model.visual.blocks.6.norm1.weight": "model-00001-of-00005.safetensors",
2343
- "model.visual.blocks.6.norm2.bias": "model-00001-of-00005.safetensors",
2344
- "model.visual.blocks.6.norm2.weight": "model-00001-of-00005.safetensors",
2345
- "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00005.safetensors",
2346
- "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00005.safetensors",
2347
- "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00005.safetensors",
2348
- "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00005.safetensors",
2349
- "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2350
- "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2351
- "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2352
- "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2353
- "model.visual.blocks.7.norm1.bias": "model-00001-of-00005.safetensors",
2354
- "model.visual.blocks.7.norm1.weight": "model-00001-of-00005.safetensors",
2355
- "model.visual.blocks.7.norm2.bias": "model-00001-of-00005.safetensors",
2356
- "model.visual.blocks.7.norm2.weight": "model-00001-of-00005.safetensors",
2357
- "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00005.safetensors",
2358
- "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00005.safetensors",
2359
- "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00005.safetensors",
2360
- "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00005.safetensors",
2361
- "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2362
- "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2363
- "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2364
- "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2365
- "model.visual.blocks.8.norm1.bias": "model-00001-of-00005.safetensors",
2366
- "model.visual.blocks.8.norm1.weight": "model-00001-of-00005.safetensors",
2367
- "model.visual.blocks.8.norm2.bias": "model-00001-of-00005.safetensors",
2368
- "model.visual.blocks.8.norm2.weight": "model-00001-of-00005.safetensors",
2369
- "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00005.safetensors",
2370
- "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00005.safetensors",
2371
- "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00005.safetensors",
2372
- "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00005.safetensors",
2373
- "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00005.safetensors",
2374
- "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00005.safetensors",
2375
- "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00005.safetensors",
2376
- "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00005.safetensors",
2377
- "model.visual.blocks.9.norm1.bias": "model-00001-of-00005.safetensors",
2378
- "model.visual.blocks.9.norm1.weight": "model-00001-of-00005.safetensors",
2379
- "model.visual.blocks.9.norm2.bias": "model-00001-of-00005.safetensors",
2380
- "model.visual.blocks.9.norm2.weight": "model-00001-of-00005.safetensors",
2381
- "model.visual.merger.linear_fc1.bias": "model-00001-of-00005.safetensors",
2382
- "model.visual.merger.linear_fc1.weight": "model-00001-of-00005.safetensors",
2383
- "model.visual.merger.linear_fc2.bias": "model-00001-of-00005.safetensors",
2384
- "model.visual.merger.linear_fc2.weight": "model-00001-of-00005.safetensors",
2385
- "model.visual.merger.norm.bias": "model-00001-of-00005.safetensors",
2386
- "model.visual.merger.norm.weight": "model-00001-of-00005.safetensors",
2387
- "model.visual.patch_embed.proj.bias": "model-00001-of-00005.safetensors",
2388
- "model.visual.patch_embed.proj.weight": "model-00001-of-00005.safetensors",
2389
- "model.visual.pos_embed.weight": "model-00001-of-00005.safetensors",
2390
- "mtp.fc.weight": "mtp.safetensors",
2391
- "mtp.layers.0.input_layernorm.weight": "mtp.safetensors",
2392
- "mtp.layers.0.mlp.down_proj.weight": "mtp.safetensors",
2393
- "mtp.layers.0.mlp.gate_proj.weight": "mtp.safetensors",
2394
- "mtp.layers.0.mlp.up_proj.weight": "mtp.safetensors",
2395
- "mtp.layers.0.post_attention_layernorm.weight": "mtp.safetensors",
2396
- "mtp.layers.0.self_attn.k_norm.weight": "mtp.safetensors",
2397
- "mtp.layers.0.self_attn.k_proj.weight": "mtp.safetensors",
2398
- "mtp.layers.0.self_attn.o_proj.weight": "mtp.safetensors",
2399
- "mtp.layers.0.self_attn.q_norm.weight": "mtp.safetensors",
2400
- "mtp.layers.0.self_attn.q_proj.weight": "mtp.safetensors",
2401
- "mtp.layers.0.self_attn.v_proj.weight": "mtp.safetensors",
2402
- "mtp.norm.weight": "mtp.safetensors",
2403
- "mtp.pre_fc_norm_embedding.weight": "mtp.safetensors",
2404
- "mtp.pre_fc_norm_hidden.weight": "mtp.safetensors"
2405
  }
2406
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 19559420239
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00002-of-00005.safetensors",
 
2054
  "model.language_model.layers.9.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
2055
  "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
2056
  "model.language_model.norm.weight": "model-00001-of-00005.safetensors",
2057
+ "model.visual.blocks.0.attn.proj.bias": "visual_mtp_weights.safetensors",
2058
+ "model.visual.blocks.0.attn.proj.weight": "visual_mtp_weights.safetensors",
2059
+ "model.visual.blocks.0.attn.qkv.bias": "visual_mtp_weights.safetensors",
2060
+ "model.visual.blocks.0.attn.qkv.weight": "visual_mtp_weights.safetensors",
2061
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2062
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2063
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2064
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2065
+ "model.visual.blocks.0.norm1.bias": "visual_mtp_weights.safetensors",
2066
+ "model.visual.blocks.0.norm1.weight": "visual_mtp_weights.safetensors",
2067
+ "model.visual.blocks.0.norm2.bias": "visual_mtp_weights.safetensors",
2068
+ "model.visual.blocks.0.norm2.weight": "visual_mtp_weights.safetensors",
2069
+ "model.visual.blocks.1.attn.proj.bias": "visual_mtp_weights.safetensors",
2070
+ "model.visual.blocks.1.attn.proj.weight": "visual_mtp_weights.safetensors",
2071
+ "model.visual.blocks.1.attn.qkv.bias": "visual_mtp_weights.safetensors",
2072
+ "model.visual.blocks.1.attn.qkv.weight": "visual_mtp_weights.safetensors",
2073
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2074
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2075
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2076
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2077
+ "model.visual.blocks.1.norm1.bias": "visual_mtp_weights.safetensors",
2078
+ "model.visual.blocks.1.norm1.weight": "visual_mtp_weights.safetensors",
2079
+ "model.visual.blocks.1.norm2.bias": "visual_mtp_weights.safetensors",
2080
+ "model.visual.blocks.1.norm2.weight": "visual_mtp_weights.safetensors",
2081
+ "model.visual.blocks.10.attn.proj.bias": "visual_mtp_weights.safetensors",
2082
+ "model.visual.blocks.10.attn.proj.weight": "visual_mtp_weights.safetensors",
2083
+ "model.visual.blocks.10.attn.qkv.bias": "visual_mtp_weights.safetensors",
2084
+ "model.visual.blocks.10.attn.qkv.weight": "visual_mtp_weights.safetensors",
2085
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2086
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2087
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2088
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2089
+ "model.visual.blocks.10.norm1.bias": "visual_mtp_weights.safetensors",
2090
+ "model.visual.blocks.10.norm1.weight": "visual_mtp_weights.safetensors",
2091
+ "model.visual.blocks.10.norm2.bias": "visual_mtp_weights.safetensors",
2092
+ "model.visual.blocks.10.norm2.weight": "visual_mtp_weights.safetensors",
2093
+ "model.visual.blocks.11.attn.proj.bias": "visual_mtp_weights.safetensors",
2094
+ "model.visual.blocks.11.attn.proj.weight": "visual_mtp_weights.safetensors",
2095
+ "model.visual.blocks.11.attn.qkv.bias": "visual_mtp_weights.safetensors",
2096
+ "model.visual.blocks.11.attn.qkv.weight": "visual_mtp_weights.safetensors",
2097
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2098
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2099
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2100
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2101
+ "model.visual.blocks.11.norm1.bias": "visual_mtp_weights.safetensors",
2102
+ "model.visual.blocks.11.norm1.weight": "visual_mtp_weights.safetensors",
2103
+ "model.visual.blocks.11.norm2.bias": "visual_mtp_weights.safetensors",
2104
+ "model.visual.blocks.11.norm2.weight": "visual_mtp_weights.safetensors",
2105
+ "model.visual.blocks.12.attn.proj.bias": "visual_mtp_weights.safetensors",
2106
+ "model.visual.blocks.12.attn.proj.weight": "visual_mtp_weights.safetensors",
2107
+ "model.visual.blocks.12.attn.qkv.bias": "visual_mtp_weights.safetensors",
2108
+ "model.visual.blocks.12.attn.qkv.weight": "visual_mtp_weights.safetensors",
2109
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2110
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2111
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2112
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2113
+ "model.visual.blocks.12.norm1.bias": "visual_mtp_weights.safetensors",
2114
+ "model.visual.blocks.12.norm1.weight": "visual_mtp_weights.safetensors",
2115
+ "model.visual.blocks.12.norm2.bias": "visual_mtp_weights.safetensors",
2116
+ "model.visual.blocks.12.norm2.weight": "visual_mtp_weights.safetensors",
2117
+ "model.visual.blocks.13.attn.proj.bias": "visual_mtp_weights.safetensors",
2118
+ "model.visual.blocks.13.attn.proj.weight": "visual_mtp_weights.safetensors",
2119
+ "model.visual.blocks.13.attn.qkv.bias": "visual_mtp_weights.safetensors",
2120
+ "model.visual.blocks.13.attn.qkv.weight": "visual_mtp_weights.safetensors",
2121
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2122
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2123
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2124
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2125
+ "model.visual.blocks.13.norm1.bias": "visual_mtp_weights.safetensors",
2126
+ "model.visual.blocks.13.norm1.weight": "visual_mtp_weights.safetensors",
2127
+ "model.visual.blocks.13.norm2.bias": "visual_mtp_weights.safetensors",
2128
+ "model.visual.blocks.13.norm2.weight": "visual_mtp_weights.safetensors",
2129
+ "model.visual.blocks.14.attn.proj.bias": "visual_mtp_weights.safetensors",
2130
+ "model.visual.blocks.14.attn.proj.weight": "visual_mtp_weights.safetensors",
2131
+ "model.visual.blocks.14.attn.qkv.bias": "visual_mtp_weights.safetensors",
2132
+ "model.visual.blocks.14.attn.qkv.weight": "visual_mtp_weights.safetensors",
2133
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2134
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2135
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2136
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2137
+ "model.visual.blocks.14.norm1.bias": "visual_mtp_weights.safetensors",
2138
+ "model.visual.blocks.14.norm1.weight": "visual_mtp_weights.safetensors",
2139
+ "model.visual.blocks.14.norm2.bias": "visual_mtp_weights.safetensors",
2140
+ "model.visual.blocks.14.norm2.weight": "visual_mtp_weights.safetensors",
2141
+ "model.visual.blocks.15.attn.proj.bias": "visual_mtp_weights.safetensors",
2142
+ "model.visual.blocks.15.attn.proj.weight": "visual_mtp_weights.safetensors",
2143
+ "model.visual.blocks.15.attn.qkv.bias": "visual_mtp_weights.safetensors",
2144
+ "model.visual.blocks.15.attn.qkv.weight": "visual_mtp_weights.safetensors",
2145
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2146
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2147
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2148
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2149
+ "model.visual.blocks.15.norm1.bias": "visual_mtp_weights.safetensors",
2150
+ "model.visual.blocks.15.norm1.weight": "visual_mtp_weights.safetensors",
2151
+ "model.visual.blocks.15.norm2.bias": "visual_mtp_weights.safetensors",
2152
+ "model.visual.blocks.15.norm2.weight": "visual_mtp_weights.safetensors",
2153
+ "model.visual.blocks.16.attn.proj.bias": "visual_mtp_weights.safetensors",
2154
+ "model.visual.blocks.16.attn.proj.weight": "visual_mtp_weights.safetensors",
2155
+ "model.visual.blocks.16.attn.qkv.bias": "visual_mtp_weights.safetensors",
2156
+ "model.visual.blocks.16.attn.qkv.weight": "visual_mtp_weights.safetensors",
2157
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2158
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2159
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2160
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2161
+ "model.visual.blocks.16.norm1.bias": "visual_mtp_weights.safetensors",
2162
+ "model.visual.blocks.16.norm1.weight": "visual_mtp_weights.safetensors",
2163
+ "model.visual.blocks.16.norm2.bias": "visual_mtp_weights.safetensors",
2164
+ "model.visual.blocks.16.norm2.weight": "visual_mtp_weights.safetensors",
2165
+ "model.visual.blocks.17.attn.proj.bias": "visual_mtp_weights.safetensors",
2166
+ "model.visual.blocks.17.attn.proj.weight": "visual_mtp_weights.safetensors",
2167
+ "model.visual.blocks.17.attn.qkv.bias": "visual_mtp_weights.safetensors",
2168
+ "model.visual.blocks.17.attn.qkv.weight": "visual_mtp_weights.safetensors",
2169
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2170
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2171
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2172
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2173
+ "model.visual.blocks.17.norm1.bias": "visual_mtp_weights.safetensors",
2174
+ "model.visual.blocks.17.norm1.weight": "visual_mtp_weights.safetensors",
2175
+ "model.visual.blocks.17.norm2.bias": "visual_mtp_weights.safetensors",
2176
+ "model.visual.blocks.17.norm2.weight": "visual_mtp_weights.safetensors",
2177
+ "model.visual.blocks.18.attn.proj.bias": "visual_mtp_weights.safetensors",
2178
+ "model.visual.blocks.18.attn.proj.weight": "visual_mtp_weights.safetensors",
2179
+ "model.visual.blocks.18.attn.qkv.bias": "visual_mtp_weights.safetensors",
2180
+ "model.visual.blocks.18.attn.qkv.weight": "visual_mtp_weights.safetensors",
2181
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2182
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2183
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2184
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2185
+ "model.visual.blocks.18.norm1.bias": "visual_mtp_weights.safetensors",
2186
+ "model.visual.blocks.18.norm1.weight": "visual_mtp_weights.safetensors",
2187
+ "model.visual.blocks.18.norm2.bias": "visual_mtp_weights.safetensors",
2188
+ "model.visual.blocks.18.norm2.weight": "visual_mtp_weights.safetensors",
2189
+ "model.visual.blocks.19.attn.proj.bias": "visual_mtp_weights.safetensors",
2190
+ "model.visual.blocks.19.attn.proj.weight": "visual_mtp_weights.safetensors",
2191
+ "model.visual.blocks.19.attn.qkv.bias": "visual_mtp_weights.safetensors",
2192
+ "model.visual.blocks.19.attn.qkv.weight": "visual_mtp_weights.safetensors",
2193
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2194
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2195
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2196
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2197
+ "model.visual.blocks.19.norm1.bias": "visual_mtp_weights.safetensors",
2198
+ "model.visual.blocks.19.norm1.weight": "visual_mtp_weights.safetensors",
2199
+ "model.visual.blocks.19.norm2.bias": "visual_mtp_weights.safetensors",
2200
+ "model.visual.blocks.19.norm2.weight": "visual_mtp_weights.safetensors",
2201
+ "model.visual.blocks.2.attn.proj.bias": "visual_mtp_weights.safetensors",
2202
+ "model.visual.blocks.2.attn.proj.weight": "visual_mtp_weights.safetensors",
2203
+ "model.visual.blocks.2.attn.qkv.bias": "visual_mtp_weights.safetensors",
2204
+ "model.visual.blocks.2.attn.qkv.weight": "visual_mtp_weights.safetensors",
2205
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2206
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2207
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2208
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2209
+ "model.visual.blocks.2.norm1.bias": "visual_mtp_weights.safetensors",
2210
+ "model.visual.blocks.2.norm1.weight": "visual_mtp_weights.safetensors",
2211
+ "model.visual.blocks.2.norm2.bias": "visual_mtp_weights.safetensors",
2212
+ "model.visual.blocks.2.norm2.weight": "visual_mtp_weights.safetensors",
2213
+ "model.visual.blocks.20.attn.proj.bias": "visual_mtp_weights.safetensors",
2214
+ "model.visual.blocks.20.attn.proj.weight": "visual_mtp_weights.safetensors",
2215
+ "model.visual.blocks.20.attn.qkv.bias": "visual_mtp_weights.safetensors",
2216
+ "model.visual.blocks.20.attn.qkv.weight": "visual_mtp_weights.safetensors",
2217
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2218
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2219
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2220
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2221
+ "model.visual.blocks.20.norm1.bias": "visual_mtp_weights.safetensors",
2222
+ "model.visual.blocks.20.norm1.weight": "visual_mtp_weights.safetensors",
2223
+ "model.visual.blocks.20.norm2.bias": "visual_mtp_weights.safetensors",
2224
+ "model.visual.blocks.20.norm2.weight": "visual_mtp_weights.safetensors",
2225
+ "model.visual.blocks.21.attn.proj.bias": "visual_mtp_weights.safetensors",
2226
+ "model.visual.blocks.21.attn.proj.weight": "visual_mtp_weights.safetensors",
2227
+ "model.visual.blocks.21.attn.qkv.bias": "visual_mtp_weights.safetensors",
2228
+ "model.visual.blocks.21.attn.qkv.weight": "visual_mtp_weights.safetensors",
2229
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2230
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2231
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2232
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2233
+ "model.visual.blocks.21.norm1.bias": "visual_mtp_weights.safetensors",
2234
+ "model.visual.blocks.21.norm1.weight": "visual_mtp_weights.safetensors",
2235
+ "model.visual.blocks.21.norm2.bias": "visual_mtp_weights.safetensors",
2236
+ "model.visual.blocks.21.norm2.weight": "visual_mtp_weights.safetensors",
2237
+ "model.visual.blocks.22.attn.proj.bias": "visual_mtp_weights.safetensors",
2238
+ "model.visual.blocks.22.attn.proj.weight": "visual_mtp_weights.safetensors",
2239
+ "model.visual.blocks.22.attn.qkv.bias": "visual_mtp_weights.safetensors",
2240
+ "model.visual.blocks.22.attn.qkv.weight": "visual_mtp_weights.safetensors",
2241
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2242
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2243
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2244
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2245
+ "model.visual.blocks.22.norm1.bias": "visual_mtp_weights.safetensors",
2246
+ "model.visual.blocks.22.norm1.weight": "visual_mtp_weights.safetensors",
2247
+ "model.visual.blocks.22.norm2.bias": "visual_mtp_weights.safetensors",
2248
+ "model.visual.blocks.22.norm2.weight": "visual_mtp_weights.safetensors",
2249
+ "model.visual.blocks.23.attn.proj.bias": "visual_mtp_weights.safetensors",
2250
+ "model.visual.blocks.23.attn.proj.weight": "visual_mtp_weights.safetensors",
2251
+ "model.visual.blocks.23.attn.qkv.bias": "visual_mtp_weights.safetensors",
2252
+ "model.visual.blocks.23.attn.qkv.weight": "visual_mtp_weights.safetensors",
2253
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2254
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2255
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2256
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2257
+ "model.visual.blocks.23.norm1.bias": "visual_mtp_weights.safetensors",
2258
+ "model.visual.blocks.23.norm1.weight": "visual_mtp_weights.safetensors",
2259
+ "model.visual.blocks.23.norm2.bias": "visual_mtp_weights.safetensors",
2260
+ "model.visual.blocks.23.norm2.weight": "visual_mtp_weights.safetensors",
2261
+ "model.visual.blocks.24.attn.proj.bias": "visual_mtp_weights.safetensors",
2262
+ "model.visual.blocks.24.attn.proj.weight": "visual_mtp_weights.safetensors",
2263
+ "model.visual.blocks.24.attn.qkv.bias": "visual_mtp_weights.safetensors",
2264
+ "model.visual.blocks.24.attn.qkv.weight": "visual_mtp_weights.safetensors",
2265
+ "model.visual.blocks.24.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2266
+ "model.visual.blocks.24.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2267
+ "model.visual.blocks.24.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2268
+ "model.visual.blocks.24.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2269
+ "model.visual.blocks.24.norm1.bias": "visual_mtp_weights.safetensors",
2270
+ "model.visual.blocks.24.norm1.weight": "visual_mtp_weights.safetensors",
2271
+ "model.visual.blocks.24.norm2.bias": "visual_mtp_weights.safetensors",
2272
+ "model.visual.blocks.24.norm2.weight": "visual_mtp_weights.safetensors",
2273
+ "model.visual.blocks.25.attn.proj.bias": "visual_mtp_weights.safetensors",
2274
+ "model.visual.blocks.25.attn.proj.weight": "visual_mtp_weights.safetensors",
2275
+ "model.visual.blocks.25.attn.qkv.bias": "visual_mtp_weights.safetensors",
2276
+ "model.visual.blocks.25.attn.qkv.weight": "visual_mtp_weights.safetensors",
2277
+ "model.visual.blocks.25.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2278
+ "model.visual.blocks.25.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2279
+ "model.visual.blocks.25.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2280
+ "model.visual.blocks.25.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2281
+ "model.visual.blocks.25.norm1.bias": "visual_mtp_weights.safetensors",
2282
+ "model.visual.blocks.25.norm1.weight": "visual_mtp_weights.safetensors",
2283
+ "model.visual.blocks.25.norm2.bias": "visual_mtp_weights.safetensors",
2284
+ "model.visual.blocks.25.norm2.weight": "visual_mtp_weights.safetensors",
2285
+ "model.visual.blocks.26.attn.proj.bias": "visual_mtp_weights.safetensors",
2286
+ "model.visual.blocks.26.attn.proj.weight": "visual_mtp_weights.safetensors",
2287
+ "model.visual.blocks.26.attn.qkv.bias": "visual_mtp_weights.safetensors",
2288
+ "model.visual.blocks.26.attn.qkv.weight": "visual_mtp_weights.safetensors",
2289
+ "model.visual.blocks.26.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2290
+ "model.visual.blocks.26.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2291
+ "model.visual.blocks.26.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2292
+ "model.visual.blocks.26.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2293
+ "model.visual.blocks.26.norm1.bias": "visual_mtp_weights.safetensors",
2294
+ "model.visual.blocks.26.norm1.weight": "visual_mtp_weights.safetensors",
2295
+ "model.visual.blocks.26.norm2.bias": "visual_mtp_weights.safetensors",
2296
+ "model.visual.blocks.26.norm2.weight": "visual_mtp_weights.safetensors",
2297
+ "model.visual.blocks.3.attn.proj.bias": "visual_mtp_weights.safetensors",
2298
+ "model.visual.blocks.3.attn.proj.weight": "visual_mtp_weights.safetensors",
2299
+ "model.visual.blocks.3.attn.qkv.bias": "visual_mtp_weights.safetensors",
2300
+ "model.visual.blocks.3.attn.qkv.weight": "visual_mtp_weights.safetensors",
2301
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2302
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2303
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2304
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2305
+ "model.visual.blocks.3.norm1.bias": "visual_mtp_weights.safetensors",
2306
+ "model.visual.blocks.3.norm1.weight": "visual_mtp_weights.safetensors",
2307
+ "model.visual.blocks.3.norm2.bias": "visual_mtp_weights.safetensors",
2308
+ "model.visual.blocks.3.norm2.weight": "visual_mtp_weights.safetensors",
2309
+ "model.visual.blocks.4.attn.proj.bias": "visual_mtp_weights.safetensors",
2310
+ "model.visual.blocks.4.attn.proj.weight": "visual_mtp_weights.safetensors",
2311
+ "model.visual.blocks.4.attn.qkv.bias": "visual_mtp_weights.safetensors",
2312
+ "model.visual.blocks.4.attn.qkv.weight": "visual_mtp_weights.safetensors",
2313
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2314
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2315
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2316
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2317
+ "model.visual.blocks.4.norm1.bias": "visual_mtp_weights.safetensors",
2318
+ "model.visual.blocks.4.norm1.weight": "visual_mtp_weights.safetensors",
2319
+ "model.visual.blocks.4.norm2.bias": "visual_mtp_weights.safetensors",
2320
+ "model.visual.blocks.4.norm2.weight": "visual_mtp_weights.safetensors",
2321
+ "model.visual.blocks.5.attn.proj.bias": "visual_mtp_weights.safetensors",
2322
+ "model.visual.blocks.5.attn.proj.weight": "visual_mtp_weights.safetensors",
2323
+ "model.visual.blocks.5.attn.qkv.bias": "visual_mtp_weights.safetensors",
2324
+ "model.visual.blocks.5.attn.qkv.weight": "visual_mtp_weights.safetensors",
2325
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2326
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2327
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2328
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2329
+ "model.visual.blocks.5.norm1.bias": "visual_mtp_weights.safetensors",
2330
+ "model.visual.blocks.5.norm1.weight": "visual_mtp_weights.safetensors",
2331
+ "model.visual.blocks.5.norm2.bias": "visual_mtp_weights.safetensors",
2332
+ "model.visual.blocks.5.norm2.weight": "visual_mtp_weights.safetensors",
2333
+ "model.visual.blocks.6.attn.proj.bias": "visual_mtp_weights.safetensors",
2334
+ "model.visual.blocks.6.attn.proj.weight": "visual_mtp_weights.safetensors",
2335
+ "model.visual.blocks.6.attn.qkv.bias": "visual_mtp_weights.safetensors",
2336
+ "model.visual.blocks.6.attn.qkv.weight": "visual_mtp_weights.safetensors",
2337
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2338
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2339
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2340
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2341
+ "model.visual.blocks.6.norm1.bias": "visual_mtp_weights.safetensors",
2342
+ "model.visual.blocks.6.norm1.weight": "visual_mtp_weights.safetensors",
2343
+ "model.visual.blocks.6.norm2.bias": "visual_mtp_weights.safetensors",
2344
+ "model.visual.blocks.6.norm2.weight": "visual_mtp_weights.safetensors",
2345
+ "model.visual.blocks.7.attn.proj.bias": "visual_mtp_weights.safetensors",
2346
+ "model.visual.blocks.7.attn.proj.weight": "visual_mtp_weights.safetensors",
2347
+ "model.visual.blocks.7.attn.qkv.bias": "visual_mtp_weights.safetensors",
2348
+ "model.visual.blocks.7.attn.qkv.weight": "visual_mtp_weights.safetensors",
2349
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2350
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2351
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2352
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2353
+ "model.visual.blocks.7.norm1.bias": "visual_mtp_weights.safetensors",
2354
+ "model.visual.blocks.7.norm1.weight": "visual_mtp_weights.safetensors",
2355
+ "model.visual.blocks.7.norm2.bias": "visual_mtp_weights.safetensors",
2356
+ "model.visual.blocks.7.norm2.weight": "visual_mtp_weights.safetensors",
2357
+ "model.visual.blocks.8.attn.proj.bias": "visual_mtp_weights.safetensors",
2358
+ "model.visual.blocks.8.attn.proj.weight": "visual_mtp_weights.safetensors",
2359
+ "model.visual.blocks.8.attn.qkv.bias": "visual_mtp_weights.safetensors",
2360
+ "model.visual.blocks.8.attn.qkv.weight": "visual_mtp_weights.safetensors",
2361
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2362
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2363
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2364
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2365
+ "model.visual.blocks.8.norm1.bias": "visual_mtp_weights.safetensors",
2366
+ "model.visual.blocks.8.norm1.weight": "visual_mtp_weights.safetensors",
2367
+ "model.visual.blocks.8.norm2.bias": "visual_mtp_weights.safetensors",
2368
+ "model.visual.blocks.8.norm2.weight": "visual_mtp_weights.safetensors",
2369
+ "model.visual.blocks.9.attn.proj.bias": "visual_mtp_weights.safetensors",
2370
+ "model.visual.blocks.9.attn.proj.weight": "visual_mtp_weights.safetensors",
2371
+ "model.visual.blocks.9.attn.qkv.bias": "visual_mtp_weights.safetensors",
2372
+ "model.visual.blocks.9.attn.qkv.weight": "visual_mtp_weights.safetensors",
2373
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
2374
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
2375
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
2376
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
2377
+ "model.visual.blocks.9.norm1.bias": "visual_mtp_weights.safetensors",
2378
+ "model.visual.blocks.9.norm1.weight": "visual_mtp_weights.safetensors",
2379
+ "model.visual.blocks.9.norm2.bias": "visual_mtp_weights.safetensors",
2380
+ "model.visual.blocks.9.norm2.weight": "visual_mtp_weights.safetensors",
2381
+ "model.visual.merger.linear_fc1.bias": "visual_mtp_weights.safetensors",
2382
+ "model.visual.merger.linear_fc1.weight": "visual_mtp_weights.safetensors",
2383
+ "model.visual.merger.linear_fc2.bias": "visual_mtp_weights.safetensors",
2384
+ "model.visual.merger.linear_fc2.weight": "visual_mtp_weights.safetensors",
2385
+ "model.visual.merger.norm.bias": "visual_mtp_weights.safetensors",
2386
+ "model.visual.merger.norm.weight": "visual_mtp_weights.safetensors",
2387
+ "model.visual.patch_embed.proj.bias": "visual_mtp_weights.safetensors",
2388
+ "model.visual.patch_embed.proj.weight": "visual_mtp_weights.safetensors",
2389
+ "model.visual.pos_embed.weight": "visual_mtp_weights.safetensors",
2390
+ "mtp.fc.weight": "visual_mtp_weights.safetensors",
2391
+ "mtp.layers.0.input_layernorm.weight": "visual_mtp_weights.safetensors",
2392
+ "mtp.layers.0.mlp.down_proj.weight": "visual_mtp_weights.safetensors",
2393
+ "mtp.layers.0.mlp.gate_proj.weight": "visual_mtp_weights.safetensors",
2394
+ "mtp.layers.0.mlp.up_proj.weight": "visual_mtp_weights.safetensors",
2395
+ "mtp.layers.0.post_attention_layernorm.weight": "visual_mtp_weights.safetensors",
2396
+ "mtp.layers.0.self_attn.k_norm.weight": "visual_mtp_weights.safetensors",
2397
+ "mtp.layers.0.self_attn.k_proj.weight": "visual_mtp_weights.safetensors",
2398
+ "mtp.layers.0.self_attn.o_proj.weight": "visual_mtp_weights.safetensors",
2399
+ "mtp.layers.0.self_attn.q_norm.weight": "visual_mtp_weights.safetensors",
2400
+ "mtp.layers.0.self_attn.q_proj.weight": "visual_mtp_weights.safetensors",
2401
+ "mtp.layers.0.self_attn.v_proj.weight": "visual_mtp_weights.safetensors",
2402
+ "mtp.norm.weight": "visual_mtp_weights.safetensors",
2403
+ "mtp.pre_fc_norm_embedding.weight": "visual_mtp_weights.safetensors",
2404
+ "mtp.pre_fc_norm_hidden.weight": "visual_mtp_weights.safetensors"
2405
  }
2406
  }
preprocessor_config.json CHANGED
@@ -1,21 +1,28 @@
1
  {
2
- "size": {
3
- "longest_edge": 16777216,
4
- "shortest_edge": 65536
5
- },
6
- "patch_size": 16,
7
- "temporal_patch_size": 2,
8
- "merge_size": 2,
9
- "image_mean": [
10
- 0.5,
11
- 0.5,
12
- 0.5
13
- ],
14
- "image_std": [
15
- 0.5,
16
- 0.5,
17
- 0.5
18
- ],
19
- "processor_class": "Qwen3VLProcessor",
20
- "image_processor_type": "Qwen2VLImageProcessorFast"
 
 
 
 
 
 
 
21
  }
 
1
  {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen3VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 16384,
18
+ "min_pixels": 256,
19
+ "patch_size": 16,
20
+ "processor_class": "Qwen3VLProcessor",
21
+ "rescale_factor": 0.00392156862745098,
22
+ "resample": 3,
23
+ "size": {
24
+ "shortest_edge": 384
25
+ },
26
+ "spatial_merge_size": 2,
27
+ "temporal_patch_size": 2
28
  }
processor_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessorFast",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 16,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 16777216,
25
+ "shortest_edge": 65536
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.5,
41
+ 0.5,
42
+ 0.5
43
+ ],
44
+ "image_std": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "max_frames": 768,
50
+ "merge_size": 2,
51
+ "min_frames": 4,
52
+ "patch_size": 16,
53
+ "resample": 3,
54
+ "rescale_factor": 0.00392156862745098,
55
+ "return_metadata": false,
56
+ "size": {
57
+ "longest_edge": 25165824,
58
+ "shortest_edge": 4096
59
+ },
60
+ "temporal_patch_size": 2,
61
+ "video_processor_type": "Qwen3VLVideoProcessor"
62
+ }
63
+ }
quant_log.csv DELETED
@@ -1,401 +0,0 @@
1
- layer,module,loss,samples,damp,time
2
- 0,linear_attn.in_proj_qkv,0.0005375139,0.05000,4.042
3
- 0,linear_attn.in_proj_z,0.0003515709,0.05000,3.858
4
- 0,linear_attn.out_proj,0.0000002353,0.05000,4.637
5
- 0,mlp.gate_proj,0.0000034350,0.05000,7.864
6
- 0,mlp.up_proj,0.0000031435,0.05000,7.864
7
- 0,mlp.down_proj,0.0000000338,0.05000,14.103
8
- 1,linear_attn.in_proj_qkv,0.0000247697,0.05000,4.126
9
- 1,linear_attn.in_proj_z,0.0000152088,0.05000,3.910
10
- 1,linear_attn.out_proj,0.0000000828,0.05000,4.622
11
- 1,mlp.gate_proj,0.0000076606,0.05000,7.864
12
- 1,mlp.up_proj,0.0000072644,0.05000,7.887
13
- 1,mlp.down_proj,0.0000000610,0.05000,13.550
14
- 2,linear_attn.in_proj_qkv,0.0000388341,0.05000,3.911
15
- 2,linear_attn.in_proj_z,0.0000234090,0.05000,3.761
16
- 2,linear_attn.out_proj,0.0000001344,0.05000,4.565
17
- 2,mlp.gate_proj,0.0000142704,0.05000,7.359
18
- 2,mlp.up_proj,0.0000133833,0.05000,7.422
19
- 2,mlp.down_proj,0.0000002395,0.05000,13.392
20
- 3,self_attn.v_proj,0.0000288434,0.05000,12.132
21
- 3,self_attn.k_proj,0.0000296158,0.05000,12.167
22
- 3,self_attn.q_proj,0.0003874665,0.05000,12.179
23
- 3,self_attn.o_proj,0.0000001120,0.05000,4.360
24
- 3,mlp.gate_proj,0.0000190095,0.05000,7.313
25
- 3,mlp.up_proj,0.0000182555,0.05000,7.426
26
- 3,mlp.down_proj,0.0000002130,0.05000,13.499
27
- 4,linear_attn.in_proj_qkv,0.0000622010,0.05000,3.933
28
- 4,linear_attn.in_proj_z,0.0000397953,0.05000,3.702
29
- 4,linear_attn.out_proj,0.0000002836,0.05000,4.359
30
- 4,mlp.gate_proj,0.0000268327,0.05000,7.478
31
- 4,mlp.up_proj,0.0000257800,0.05000,7.513
32
- 4,mlp.down_proj,0.0000003252,0.05000,13.431
33
- 5,linear_attn.in_proj_qkv,0.0000803550,0.05000,3.904
34
- 5,linear_attn.in_proj_z,0.0000517575,0.05000,3.733
35
- 5,linear_attn.out_proj,0.0000003582,0.05000,4.399
36
- 5,mlp.gate_proj,0.0000352486,0.05000,7.595
37
- 5,mlp.up_proj,0.0000331223,0.05000,7.608
38
- 5,mlp.down_proj,0.0000005086,0.05000,13.313
39
- 6,linear_attn.in_proj_qkv,0.0001166319,0.05000,3.919
40
- 6,linear_attn.in_proj_z,0.0000688981,0.05000,3.812
41
- 6,linear_attn.out_proj,0.0000005175,0.05000,4.370
42
- 6,mlp.up_proj,0.0000443727,0.05000,7.373
43
- 6,mlp.gate_proj,0.0000479484,0.05000,7.446
44
- 6,mlp.down_proj,0.0000008847,0.05000,13.296
45
- 7,self_attn.v_proj,0.0000276241,0.05000,12.092
46
- 7,self_attn.k_proj,0.0000305602,0.05000,12.200
47
- 7,self_attn.q_proj,0.0003577193,0.05000,12.205
48
- 7,self_attn.o_proj,0.0000003202,0.05000,4.352
49
- 7,mlp.gate_proj,0.0000549339,0.05000,7.342
50
- 7,mlp.up_proj,0.0000510183,0.05000,7.372
51
- 7,mlp.down_proj,0.0000010120,0.05000,13.295
52
- 8,linear_attn.in_proj_qkv,0.0001316477,0.05000,4.026
53
- 8,linear_attn.in_proj_z,0.0000825992,0.05000,3.719
54
- 8,linear_attn.out_proj,0.0000007603,0.05000,4.337
55
- 8,mlp.up_proj,0.0000569455,0.05000,7.281
56
- 8,mlp.gate_proj,0.0000608236,0.05000,7.330
57
- 8,mlp.down_proj,0.0000011534,0.05000,13.460
58
- 9,linear_attn.in_proj_qkv,0.0001313925,0.05000,3.824
59
- 9,linear_attn.in_proj_z,0.0000792323,0.05000,3.677
60
- 9,linear_attn.out_proj,0.0000007958,0.05000,4.393
61
- 9,mlp.gate_proj,0.0000662348,0.05000,7.407
62
- 9,mlp.up_proj,0.0000623162,0.05000,7.431
63
- 9,mlp.down_proj,0.0000013028,0.05000,12.976
64
- 10,linear_attn.in_proj_qkv,0.0001373728,0.05000,3.987
65
- 10,linear_attn.in_proj_z,0.0000804026,0.05000,3.729
66
- 10,linear_attn.out_proj,0.0000009129,0.05000,4.235
67
- 10,mlp.up_proj,0.0000642295,0.05000,7.260
68
- 10,mlp.gate_proj,0.0000682146,0.05000,7.284
69
- 10,mlp.down_proj,0.0000014409,0.05000,13.193
70
- 11,self_attn.q_proj,0.0003283735,0.05000,12.208
71
- 11,self_attn.k_proj,0.0000299146,0.05000,12.295
72
- 11,self_attn.v_proj,0.0000262362,0.05000,12.378
73
- 11,self_attn.o_proj,0.0000005684,0.05000,4.252
74
- 11,mlp.up_proj,0.0000684680,0.05000,7.379
75
- 11,mlp.gate_proj,0.0000719400,0.05000,7.530
76
- 11,mlp.down_proj,0.0000016315,0.05000,13.352
77
- 12,linear_attn.in_proj_qkv,0.0001524303,0.05000,4.002
78
- 12,linear_attn.in_proj_z,0.0000888674,0.05000,3.695
79
- 12,linear_attn.out_proj,0.0000013819,0.05000,4.279
80
- 12,mlp.up_proj,0.0000704509,0.05000,7.016
81
- 12,mlp.gate_proj,0.0000742550,0.05000,7.178
82
- 12,mlp.down_proj,0.0000018874,0.05000,13.696
83
- 13,linear_attn.in_proj_qkv,0.0001589462,0.05000,3.818
84
- 13,linear_attn.in_proj_z,0.0000881903,0.05000,3.892
85
- 13,linear_attn.out_proj,0.0000014786,0.05000,4.371
86
- 13,mlp.gate_proj,0.0000791823,0.05000,7.273
87
- 13,mlp.up_proj,0.0000753247,0.05000,7.327
88
- 13,mlp.down_proj,0.0000021395,0.05000,12.876
89
- 14,linear_attn.in_proj_qkv,0.0001752392,0.05000,3.947
90
- 14,linear_attn.in_proj_z,0.0000999004,0.05000,3.712
91
- 14,linear_attn.out_proj,0.0000017957,0.05000,4.309
92
- 14,mlp.up_proj,0.0000802380,0.05000,7.191
93
- 14,mlp.gate_proj,0.0000841347,0.05000,7.203
94
- 14,mlp.down_proj,0.0000023905,0.05000,12.554
95
- 15,self_attn.k_proj,0.0000265395,0.05000,12.668
96
- 15,self_attn.q_proj,0.0002868342,0.05000,12.781
97
- 15,self_attn.v_proj,0.0000250176,0.05000,12.811
98
- 15,self_attn.o_proj,0.0000008655,0.05000,4.403
99
- 15,mlp.gate_proj,0.0000938290,0.05000,7.279
100
- 15,mlp.up_proj,0.0000904346,0.05000,7.298
101
- 15,mlp.down_proj,0.0000026800,0.05000,14.304
102
- 16,linear_attn.in_proj_qkv,0.0001868244,0.05000,4.146
103
- 16,linear_attn.in_proj_z,0.0001051968,0.05000,4.040
104
- 16,linear_attn.out_proj,0.0000020658,0.05000,4.767
105
- 16,mlp.gate_proj,0.0001032226,0.05000,7.769
106
- 16,mlp.up_proj,0.0000986931,0.05000,7.987
107
- 16,mlp.down_proj,0.0000031492,0.05000,14.390
108
- 17,linear_attn.in_proj_qkv,0.0002118973,0.05000,4.194
109
- 17,linear_attn.in_proj_z,0.0001054912,0.05000,4.160
110
- 17,linear_attn.out_proj,0.0000026375,0.05000,4.668
111
- 17,mlp.gate_proj,0.0001171276,0.05000,7.854
112
- 17,mlp.up_proj,0.0001119881,0.05000,7.933
113
- 17,mlp.down_proj,0.0000041048,0.05000,14.375
114
- 18,linear_attn.in_proj_qkv,0.0002268285,0.05000,4.154
115
- 18,linear_attn.in_proj_z,0.0001231153,0.05000,4.346
116
- 18,linear_attn.out_proj,0.0000030864,0.05000,4.998
117
- 18,mlp.gate_proj,0.0001437140,0.05000,8.275
118
- 18,mlp.up_proj,0.0001326707,0.05000,8.345
119
- 18,mlp.down_proj,0.0000066838,0.05000,15.096
120
- 19,self_attn.k_proj,0.0000351372,0.05000,13.040
121
- 19,self_attn.v_proj,0.0000401050,0.05000,13.295
122
- 19,self_attn.q_proj,0.0003581474,0.05000,13.342
123
- 19,self_attn.o_proj,0.0000033805,0.05000,4.513
124
- 19,mlp.up_proj,0.0001578480,0.05000,7.428
125
- 19,mlp.gate_proj,0.0001676099,0.05000,7.521
126
- 19,mlp.down_proj,0.0000081858,0.05000,13.584
127
- 20,linear_attn.in_proj_qkv,0.0003857003,0.05000,4.143
128
- 20,linear_attn.in_proj_z,0.0002126070,0.05000,3.825
129
- 20,linear_attn.out_proj,0.0000042739,0.05000,4.365
130
- 20,mlp.gate_proj,0.0001995256,0.05000,7.309
131
- 20,mlp.up_proj,0.0001841119,0.05000,7.360
132
- 20,mlp.down_proj,0.0000094405,0.05000,13.068
133
- 21,linear_attn.in_proj_qkv,0.0004770911,0.05000,4.062
134
- 21,linear_attn.in_proj_z,0.0002983586,0.05000,3.838
135
- 21,linear_attn.out_proj,0.0000051978,0.05000,4.865
136
- 21,mlp.gate_proj,0.0002137620,0.05000,8.588
137
- 21,mlp.up_proj,0.0001987904,0.05000,8.667
138
- 21,mlp.down_proj,0.0000097025,0.05000,14.692
139
- 22,linear_attn.in_proj_qkv,0.0004051315,0.05000,4.037
140
- 22,linear_attn.in_proj_z,0.0002439546,0.05000,3.862
141
- 22,linear_attn.out_proj,0.0000042765,0.05000,4.544
142
- 22,mlp.gate_proj,0.0002479232,0.05000,7.690
143
- 22,mlp.up_proj,0.0002207545,0.05000,7.696
144
- 22,mlp.down_proj,0.0000105950,0.05000,13.732
145
- 23,self_attn.k_proj,0.0000463970,0.05000,12.129
146
- 23,self_attn.q_proj,0.0004317670,0.05000,12.218
147
- 23,self_attn.v_proj,0.0000474314,0.05000,12.276
148
- 23,self_attn.o_proj,0.0000039161,0.05000,4.454
149
- 23,mlp.gate_proj,0.0002434690,0.05000,4.061
150
- 23,mlp.up_proj,0.0002234220,0.05000,4.068
151
- 23,mlp.down_proj,0.0000098514,0.05000,6.163
152
- 24,linear_attn.in_proj_qkv,0.0003746832,0.05000,1.630
153
- 24,linear_attn.in_proj_z,0.0002085239,0.05000,1.935
154
- 24,linear_attn.out_proj,0.0000055183,0.05000,1.881
155
- 24,mlp.up_proj,0.0002386453,0.05000,2.835
156
- 24,mlp.gate_proj,0.0002611436,0.05000,2.846
157
- 24,mlp.down_proj,0.0000110752,0.05000,6.217
158
- 25,linear_attn.in_proj_qkv,0.0003768635,0.05000,1.658
159
- 25,linear_attn.in_proj_z,0.0001953509,0.05000,1.896
160
- 25,linear_attn.out_proj,0.0000056296,0.05000,1.866
161
- 25,mlp.up_proj,0.0002393584,0.05000,2.809
162
- 25,mlp.gate_proj,0.0002474422,0.05000,2.837
163
- 25,mlp.down_proj,0.0000109050,0.05000,5.939
164
- 26,linear_attn.in_proj_qkv,0.0003443491,0.05000,1.629
165
- 26,linear_attn.in_proj_z,0.0001705994,0.05000,1.761
166
- 26,linear_attn.out_proj,0.0000062545,0.05000,1.777
167
- 26,mlp.up_proj,0.0002370522,0.05000,2.612
168
- 26,mlp.gate_proj,0.0002310437,0.05000,2.628
169
- 26,mlp.down_proj,0.0000114066,0.05000,14.123
170
- 27,self_attn.v_proj,0.0000367863,0.05000,13.241
171
- 27,self_attn.q_proj,0.0003905871,0.05000,13.273
172
- 27,self_attn.k_proj,0.0000425683,0.05000,13.340
173
- 27,self_attn.o_proj,0.0000076879,0.05000,4.871
174
- 27,mlp.up_proj,0.0002445872,0.05000,8.179
175
- 27,mlp.gate_proj,0.0002310220,0.05000,8.230
176
- 27,mlp.down_proj,0.0000117096,0.05000,14.592
177
- 28,linear_attn.in_proj_qkv,0.0003290252,0.05000,4.262
178
- 28,linear_attn.in_proj_z,0.0001524443,0.05000,4.042
179
- 28,linear_attn.out_proj,0.0000077834,0.05000,4.709
180
- 28,mlp.up_proj,0.0002347492,0.05000,8.001
181
- 28,mlp.gate_proj,0.0002179738,0.05000,8.055
182
- 28,mlp.down_proj,0.0000116015,0.05000,14.366
183
- 29,linear_attn.in_proj_qkv,0.0003474303,0.05000,4.223
184
- 29,linear_attn.in_proj_z,0.0001582735,0.05000,3.978
185
- 29,linear_attn.out_proj,0.0000067616,0.05000,4.703
186
- 29,mlp.up_proj,0.0002339276,0.05000,7.818
187
- 29,mlp.gate_proj,0.0002158054,0.05000,7.938
188
- 29,mlp.down_proj,0.0000115375,0.05000,14.244
189
- 30,linear_attn.in_proj_qkv,0.0003500992,0.05000,4.178
190
- 30,linear_attn.in_proj_z,0.0001629830,0.05000,3.946
191
- 30,linear_attn.out_proj,0.0000088357,0.05000,4.402
192
- 30,mlp.up_proj,0.0002370281,0.05000,7.381
193
- 30,mlp.gate_proj,0.0002136254,0.05000,7.390
194
- 30,mlp.down_proj,0.0000118369,0.05000,13.323
195
- 31,self_attn.q_proj,0.0003585615,0.05000,12.437
196
- 31,self_attn.v_proj,0.0000437309,0.05000,12.497
197
- 31,self_attn.k_proj,0.0000397583,0.05000,12.511
198
- 31,self_attn.o_proj,0.0000089746,0.05000,4.354
199
- 31,mlp.up_proj,0.0002537108,0.05000,7.347
200
- 31,mlp.gate_proj,0.0002248938,0.05000,7.439
201
- 31,mlp.down_proj,0.0000123573,0.05000,13.377
202
- 32,linear_attn.in_proj_qkv,0.0003709294,0.05000,3.939
203
- 32,linear_attn.in_proj_z,0.0001609685,0.05000,3.698
204
- 32,linear_attn.out_proj,0.0000087306,0.05000,4.326
205
- 32,mlp.up_proj,0.0002673114,0.05000,7.415
206
- 32,mlp.gate_proj,0.0002366684,0.05000,7.490
207
- 32,mlp.down_proj,0.0000130803,0.05000,13.673
208
- 33,linear_attn.in_proj_qkv,0.0004365302,0.05000,4.096
209
- 33,linear_attn.in_proj_z,0.0001785530,0.05000,3.702
210
- 33,linear_attn.out_proj,0.0000097492,0.05000,4.057
211
- 33,mlp.gate_proj,0.0002480511,0.05000,7.279
212
- 33,mlp.up_proj,0.0002804281,0.05000,7.308
213
- 33,mlp.down_proj,0.0000136580,0.05000,13.334
214
- 34,linear_attn.in_proj_qkv,0.0004258669,0.05000,3.831
215
- 34,linear_attn.in_proj_z,0.0001928501,0.05000,3.721
216
- 34,linear_attn.out_proj,0.0000102707,0.05000,4.442
217
- 34,mlp.up_proj,0.0003019740,0.05000,7.321
218
- 34,mlp.gate_proj,0.0002771344,0.05000,7.366
219
- 34,mlp.down_proj,0.0000191384,0.05000,13.280
220
- 35,self_attn.v_proj,0.0000684588,0.05000,11.710
221
- 35,self_attn.k_proj,0.0000486297,0.05000,11.843
222
- 35,self_attn.q_proj,0.0004456763,0.05000,11.909
223
- 35,self_attn.o_proj,0.0000145637,0.05000,4.427
224
- 35,mlp.gate_proj,0.0002873185,0.05000,7.143
225
- 35,mlp.up_proj,0.0003080965,0.05000,7.252
226
- 35,mlp.down_proj,0.0000221059,0.05000,13.145
227
- 36,linear_attn.in_proj_qkv,0.0006038611,0.05000,3.909
228
- 36,linear_attn.in_proj_z,0.0003008038,0.05000,3.728
229
- 36,linear_attn.out_proj,0.0000098270,0.05000,4.396
230
- 36,mlp.up_proj,0.0003041932,0.05000,7.369
231
- 36,mlp.gate_proj,0.0003187759,0.05000,7.375
232
- 36,mlp.down_proj,0.0000210562,0.05000,13.447
233
- 37,linear_attn.in_proj_qkv,0.0006587347,0.05000,3.827
234
- 37,linear_attn.in_proj_z,0.0003705355,0.05000,3.655
235
- 37,linear_attn.out_proj,0.0000103849,0.05000,4.382
236
- 37,mlp.up_proj,0.0003054113,0.05000,7.238
237
- 37,mlp.gate_proj,0.0003235339,0.05000,7.413
238
- 37,mlp.down_proj,0.0000197475,0.05000,13.285
239
- 38,linear_attn.in_proj_qkv,0.0005866436,0.05000,3.831
240
- 38,linear_attn.in_proj_z,0.0003399069,0.05000,3.728
241
- 38,linear_attn.out_proj,0.0000070955,0.05000,4.388
242
- 38,mlp.up_proj,0.0003502550,0.05000,7.342
243
- 38,mlp.gate_proj,0.0004350511,0.05000,7.477
244
- 38,mlp.down_proj,0.0000197556,0.05000,13.336
245
- 39,self_attn.v_proj,0.0000690332,0.05000,11.787
246
- 39,self_attn.q_proj,0.0005133493,0.05000,11.855
247
- 39,self_attn.k_proj,0.0000577507,0.05000,11.959
248
- 39,self_attn.o_proj,0.0000081428,0.05000,4.310
249
- 39,mlp.up_proj,0.0003293339,0.05000,7.327
250
- 39,mlp.gate_proj,0.0003795339,0.05000,7.377
251
- 39,mlp.down_proj,0.0000167872,0.05000,13.394
252
- 40,linear_attn.in_proj_qkv,0.0005513974,0.05000,3.892
253
- 40,linear_attn.in_proj_z,0.0003037444,0.05000,3.709
254
- 40,linear_attn.out_proj,0.0000087935,0.05000,4.374
255
- 40,mlp.gate_proj,0.0004035835,0.05000,7.251
256
- 40,mlp.up_proj,0.0003409987,0.05000,7.300
257
- 40,mlp.down_proj,0.0000168670,0.05000,13.312
258
- 41,linear_attn.in_proj_qkv,0.0004881215,0.05000,3.890
259
- 41,linear_attn.in_proj_z,0.0002671910,0.05000,3.763
260
- 41,linear_attn.out_proj,0.0000075667,0.05000,4.354
261
- 41,mlp.gate_proj,0.0003435877,0.05000,7.206
262
- 41,mlp.up_proj,0.0003245952,0.05000,7.311
263
- 41,mlp.down_proj,0.0000161043,0.05000,13.384
264
- 42,linear_attn.in_proj_qkv,0.0004569151,0.05000,3.859
265
- 42,linear_attn.in_proj_z,0.0002390392,0.05000,3.677
266
- 42,linear_attn.out_proj,0.0000090846,0.05000,4.343
267
- 42,mlp.gate_proj,0.0003117448,0.05000,7.278
268
- 42,mlp.up_proj,0.0003180992,0.05000,7.370
269
- 42,mlp.down_proj,0.0000171017,0.05000,13.210
270
- 43,self_attn.k_proj,0.0000566166,0.05000,11.748
271
- 43,self_attn.q_proj,0.0004798718,0.05000,11.838
272
- 43,self_attn.v_proj,0.0000652786,0.05000,11.869
273
- 43,self_attn.o_proj,0.0000125740,0.05000,4.373
274
- 43,mlp.gate_proj,0.0003006564,0.05000,7.244
275
- 43,mlp.up_proj,0.0003207947,0.05000,7.330
276
- 43,mlp.down_proj,0.0000184054,0.05000,13.209
277
- 44,linear_attn.in_proj_qkv,0.0004242027,0.05000,3.969
278
- 44,linear_attn.in_proj_z,0.0002068353,0.05000,3.729
279
- 44,linear_attn.out_proj,0.0000130579,0.05000,4.383
280
- 44,mlp.gate_proj,0.0002931194,0.05000,7.492
281
- 44,mlp.up_proj,0.0003164854,0.05000,7.504
282
- 44,mlp.down_proj,0.0000208067,0.05000,13.460
283
- 45,linear_attn.in_proj_qkv,0.0004198022,0.05000,3.889
284
- 45,linear_attn.in_proj_z,0.0002044268,0.05000,3.712
285
- 45,linear_attn.out_proj,0.0000100431,0.05000,4.342
286
- 45,mlp.up_proj,0.0003182327,0.05000,7.222
287
- 45,mlp.gate_proj,0.0002921933,0.05000,7.309
288
- 45,mlp.down_proj,0.0000206568,0.05000,13.297
289
- 46,linear_attn.in_proj_qkv,0.0004432378,0.05000,3.811
290
- 46,linear_attn.in_proj_z,0.0002188176,0.05000,3.578
291
- 46,linear_attn.out_proj,0.0000159958,0.05000,4.362
292
- 46,mlp.up_proj,0.0003269873,0.05000,7.209
293
- 46,mlp.gate_proj,0.0002911491,0.05000,7.326
294
- 46,mlp.down_proj,0.0000221925,0.05000,13.306
295
- 47,self_attn.q_proj,0.0004898258,0.05000,11.976
296
- 47,self_attn.k_proj,0.0000527123,0.05000,12.000
297
- 47,self_attn.v_proj,0.0000802256,0.05000,12.073
298
- 47,self_attn.o_proj,0.0000113472,0.05000,4.347
299
- 47,mlp.up_proj,0.0003570259,0.05000,7.197
300
- 47,mlp.gate_proj,0.0003197138,0.05000,7.244
301
- 47,mlp.down_proj,0.0000265334,0.05000,13.358
302
- 48,linear_attn.in_proj_qkv,0.0004879047,0.05000,4.045
303
- 48,linear_attn.in_proj_z,0.0002313808,0.05000,3.735
304
- 48,linear_attn.out_proj,0.0000175865,0.05000,4.593
305
- 48,mlp.up_proj,0.0003831256,0.05000,7.891
306
- 48,mlp.gate_proj,0.0003445416,0.05000,7.934
307
- 48,mlp.down_proj,0.0000335000,0.05000,14.167
308
- 49,linear_attn.in_proj_qkv,0.0005885937,0.05000,4.504
309
- 49,linear_attn.in_proj_z,0.0002457014,0.05000,4.097
310
- 49,linear_attn.out_proj,0.0000237044,0.05000,4.843
311
- 49,mlp.gate_proj,0.0003840848,0.05000,7.974
312
- 49,mlp.up_proj,0.0004163043,0.05000,8.228
313
- 49,mlp.down_proj,0.0000451689,0.05000,14.632
314
- 50,linear_attn.in_proj_qkv,0.0006162443,0.05000,4.071
315
- 50,linear_attn.in_proj_z,0.0002804692,0.05000,3.924
316
- 50,linear_attn.out_proj,0.0000349245,0.05000,4.456
317
- 50,mlp.up_proj,0.0004862685,0.05000,7.537
318
- 50,mlp.gate_proj,0.0004747827,0.05000,7.751
319
- 50,mlp.down_proj,0.0000826267,0.05000,13.794
320
- 51,self_attn.v_proj,0.0001619573,0.05000,13.278
321
- 51,self_attn.q_proj,0.0006273309,0.05000,13.370
322
- 51,self_attn.k_proj,0.0000692513,0.05000,13.392
323
- 51,self_attn.o_proj,0.0000327376,0.05000,4.272
324
- 51,mlp.up_proj,0.0005253989,0.05000,6.498
325
- 51,mlp.gate_proj,0.0004956579,0.05000,6.636
326
- 51,mlp.down_proj,0.0001026185,0.05000,13.159
327
- 52,linear_attn.in_proj_qkv,0.0009266171,0.05000,3.825
328
- 52,linear_attn.in_proj_z,0.0004137491,0.05000,3.629
329
- 52,linear_attn.out_proj,0.0000446695,0.05000,4.312
330
- 52,mlp.up_proj,0.0005780998,0.05000,7.308
331
- 52,mlp.gate_proj,0.0006152220,0.05000,7.324
332
- 52,mlp.down_proj,0.0001118123,0.05000,13.300
333
- 53,linear_attn.in_proj_qkv,0.0009211465,0.05000,3.892
334
- 53,linear_attn.in_proj_z,0.0004431443,0.05000,3.709
335
- 53,linear_attn.out_proj,0.0000514739,0.05000,4.370
336
- 53,mlp.gate_proj,0.0007024802,0.05000,7.420
337
- 53,mlp.up_proj,0.0006290088,0.05000,7.470
338
- 53,mlp.down_proj,0.0001273530,0.05000,13.885
339
- 54,linear_attn.in_proj_qkv,0.0009073259,0.05000,4.184
340
- 54,linear_attn.in_proj_z,0.0004801113,0.05000,3.902
341
- 54,linear_attn.out_proj,0.0000492737,0.05000,4.573
342
- 54,mlp.gate_proj,0.0009067172,0.05000,7.734
343
- 54,mlp.up_proj,0.0007649405,0.05000,7.785
344
- 54,mlp.down_proj,0.0001912621,0.05000,13.749
345
- 55,self_attn.v_proj,0.0002226114,0.05000,11.937
346
- 55,self_attn.k_proj,0.0001067982,0.05000,12.172
347
- 55,self_attn.q_proj,0.0008758757,0.05000,12.200
348
- 55,self_attn.o_proj,0.0000413251,0.05000,4.451
349
- 55,mlp.gate_proj,0.0008657420,0.05000,7.637
350
- 55,mlp.up_proj,0.0007707832,0.05000,7.694
351
- 55,mlp.down_proj,0.0001647693,0.05000,13.720
352
- 56,linear_attn.in_proj_qkv,0.0010866812,0.05000,1.976
353
- 56,linear_attn.in_proj_z,0.0005330881,0.05000,1.723
354
- 56,linear_attn.out_proj,0.0000856800,0.05000,1.750
355
- 56,mlp.gate_proj,0.0009597083,0.05000,2.876
356
- 56,mlp.up_proj,0.0008358503,0.05000,2.896
357
- 56,mlp.down_proj,0.0001710205,0.05000,6.148
358
- 57,linear_attn.in_proj_qkv,0.0011609036,0.05000,1.614
359
- 57,linear_attn.in_proj_z,0.0005589580,0.05000,1.844
360
- 57,linear_attn.out_proj,0.0000814013,0.05000,1.834
361
- 57,mlp.gate_proj,0.0011157731,0.05000,2.722
362
- 57,mlp.up_proj,0.0009594462,0.05000,2.735
363
- 57,mlp.down_proj,0.0001745235,0.05000,6.145
364
- 58,linear_attn.in_proj_qkv,0.0011074618,0.05000,1.627
365
- 58,linear_attn.in_proj_z,0.0005856057,0.05000,1.876
366
- 58,linear_attn.out_proj,0.0000750041,0.05000,1.704
367
- 58,mlp.up_proj,0.0011001359,0.05000,2.567
368
- 58,mlp.gate_proj,0.0012845446,0.05000,2.602
369
- 58,mlp.down_proj,0.0002041286,0.05000,5.796
370
- 59,self_attn.v_proj,0.0004657917,0.05000,11.021
371
- 59,self_attn.k_proj,0.0001350715,0.05000,11.043
372
- 59,self_attn.q_proj,0.0010552907,0.05000,11.053
373
- 59,self_attn.o_proj,0.0001216906,0.05000,4.675
374
- 59,mlp.up_proj,0.0011041036,0.05000,8.476
375
- 59,mlp.gate_proj,0.0012555530,0.05000,8.495
376
- 59,mlp.down_proj,0.0002444422,0.05000,14.802
377
- 60,linear_attn.in_proj_qkv,0.0013573468,0.05000,4.353
378
- 60,linear_attn.in_proj_z,0.0006349659,0.05000,4.117
379
- 60,linear_attn.out_proj,0.0001707409,0.05000,4.842
380
- 60,mlp.up_proj,0.0011231104,0.05000,8.090
381
- 60,mlp.gate_proj,0.0012666935,0.05000,8.147
382
- 60,mlp.down_proj,0.0002962877,0.05000,14.318
383
- 61,linear_attn.in_proj_qkv,0.0009809255,0.05000,4.231
384
- 61,linear_attn.in_proj_z,0.0005509510,0.05000,4.022
385
- 61,linear_attn.out_proj,0.0001616595,0.05000,4.698
386
- 61,mlp.gate_proj,0.0013469172,0.05000,8.006
387
- 61,mlp.up_proj,0.0011976919,0.05000,8.052
388
- 61,mlp.down_proj,0.0003628120,0.05000,14.145
389
- 62,linear_attn.in_proj_qkv,0.0010928032,0.05000,4.179
390
- 62,linear_attn.in_proj_z,0.0005702188,0.05000,3.975
391
- 62,linear_attn.out_proj,0.0002907203,0.05000,4.675
392
- 62,mlp.gate_proj,0.0012338856,0.05000,7.959
393
- 62,mlp.up_proj,0.0011025960,0.05000,7.974
394
- 62,mlp.down_proj,0.0005055889,0.05000,14.051
395
- 63,self_attn.q_proj,0.0009252999,0.05000,11.630
396
- 63,self_attn.v_proj,0.0003772193,0.05000,11.671
397
- 63,self_attn.k_proj,0.0001241435,0.05000,11.675
398
- 63,self_attn.o_proj,0.0003580288,0.05000,4.633
399
- 63,mlp.up_proj,0.0009046681,0.05000,7.865
400
- 63,mlp.gate_proj,0.0010507936,0.05000,7.868
401
- 63,mlp.down_proj,0.0010416286,0.05000,14.094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantize_config.json CHANGED
@@ -25,7 +25,7 @@
25
  },
26
  "offload_to_disk": false,
27
  "offload_to_disk_path": null,
28
- "pack_impl": "cuda",
29
  "mock_quantization": false,
30
  "gc_mode": "interval",
31
  "wait_for_submodule_finalizers": false,
 
25
  },
26
  "offload_to_disk": false,
27
  "offload_to_disk_path": null,
28
+ "pack_impl": "cpu",
29
  "mock_quantization": false,
30
  "gc_mode": "interval",
31
  "wait_for_submodule_finalizers": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0316e38d81a31965b770393461dcc03612ac06e2733f8e1cc440d36a2ef189
3
- size 19989442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343
tokenizer_config.json CHANGED
@@ -9,7 +9,7 @@
9
  "eos_token": "<|im_end|>",
10
  "errors": "replace",
11
  "image_token": "<|image_pad|>",
12
- "is_local": true,
13
  "model_max_length": 262144,
14
  "model_specific_special_tokens": {
15
  "audio_bos_token": "<|audio_start|>",
@@ -25,9 +25,10 @@
25
  "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
  "processor_class": "Qwen3VLProcessor",
27
  "split_special_tokens": false,
28
- "tokenizer_class": "TokenizersBackend",
29
  "unk_token": null,
30
  "video_token": "<|video_pad|>",
31
  "vision_bos_token": "<|vision_start|>",
32
- "vision_eos_token": "<|vision_end|>"
33
- }
 
 
9
  "eos_token": "<|im_end|>",
10
  "errors": "replace",
11
  "image_token": "<|image_pad|>",
12
+ "is_local": false,
13
  "model_max_length": 262144,
14
  "model_specific_special_tokens": {
15
  "audio_bos_token": "<|audio_start|>",
 
25
  "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
  "processor_class": "Qwen3VLProcessor",
27
  "split_special_tokens": false,
28
+ "tokenizer_class": "TokenizersBackendFast",
29
  "unk_token": null,
30
  "video_token": "<|video_pad|>",
31
  "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>",
33
+ "_commit_hash": null
34
+ }
mtp.safetensors → visual_mtp_weights.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd2bb454f23e11b7960f6f0fea12a410677e91745789b34c5dd6c10459bfb7a2
3
- size 849400392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c445390601445b28d78cff728d2cce1630310c10a5cfff8ebe08cdc503eabb
3
+ size 1770897616