Upgrade to C4 calibration (1024 samples) for better general-purpose performance
Browse files- README.md +1 -3
- config.json +47 -48
- generation_config.json +0 -8
- model-00001-of-00005.safetensors +2 -2
- model-00002-of-00005.safetensors +2 -2
- model-00003-of-00005.safetensors +2 -2
- model-00004-of-00005.safetensors +2 -2
- model-00005-of-00005.safetensors +2 -2
- model.safetensors.index.json +349 -349
- preprocessor_config.json +26 -19
- processor_config.json +63 -0
- quant_log.csv +0 -401
- quantize_config.json +1 -1
- tokenizer.json +2 -2
- tokenizer_config.json +5 -4
- mtp.safetensors → visual_mtp_weights.safetensors +2 -2
README.md
CHANGED
|
@@ -18,8 +18,6 @@ pipeline_tag: image-text-to-text
|
|
| 18 |
|
| 19 |
# Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-GPTQ-int4
|
| 20 |
|
| 21 |
-
> ⚠️ **Caution**: This quantization is not well tested. Use at your own risk and please report any issues.
|
| 22 |
-
|
| 23 |
This is a **GPTQ INT4 quantized** version of [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled).
|
| 24 |
|
| 25 |
Please refer to the original model card for details on the model architecture, training data, and capabilities.
|
|
@@ -30,7 +28,7 @@ Please refer to the original model card for details on the model architecture, t
|
|
| 30 |
|
| 31 |
- **Method**: GPTQ (4-bit INT4, W4A16)
|
| 32 |
- **Group Size**: 128
|
| 33 |
-
- **Calibration**:
|
| 34 |
- **Vision Encoder**: Preserved (not quantized)
|
| 35 |
- **MTP Module**: Preserved (not quantized)
|
| 36 |
|
|
|
|
| 18 |
|
| 19 |
# Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-GPTQ-int4
|
| 20 |
|
|
|
|
|
|
|
| 21 |
This is a **GPTQ INT4 quantized** version of [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled).
|
| 22 |
|
| 23 |
Please refer to the original model card for details on the model architecture, training data, and capabilities.
|
|
|
|
| 28 |
|
| 29 |
- **Method**: GPTQ (4-bit INT4, W4A16)
|
| 30 |
- **Group Size**: 128
|
| 31 |
+
- **Calibration**: 1024 samples from C4 dataset
|
| 32 |
- **Vision Encoder**: Preserved (not quantized)
|
| 33 |
- **MTP Module**: Preserved (not quantized)
|
| 34 |
|
config.json
CHANGED
|
@@ -3,60 +3,18 @@
|
|
| 3 |
"Qwen3_5ForConditionalGeneration"
|
| 4 |
],
|
| 5 |
"bos_token_id": null,
|
| 6 |
-
"
|
| 7 |
"eos_token_id": 248046,
|
| 8 |
"image_token_id": 248056,
|
| 9 |
"model_name": "qwen/Qwen3.5-27B",
|
| 10 |
"model_type": "qwen3_5",
|
| 11 |
"pad_token_id": 248044,
|
| 12 |
-
"quantization_config": {
|
| 13 |
-
"bits": 4,
|
| 14 |
-
"checkpoint_format": "gptq",
|
| 15 |
-
"desc_act": false,
|
| 16 |
-
"format": "gptq",
|
| 17 |
-
"group_size": 128,
|
| 18 |
-
"lm_head": false,
|
| 19 |
-
"meta": {
|
| 20 |
-
"act_group_aware": true,
|
| 21 |
-
"auto_forward_data_parallel": true,
|
| 22 |
-
"damp_auto_increment": 0.01,
|
| 23 |
-
"damp_percent": 0.05,
|
| 24 |
-
"failsafe": {
|
| 25 |
-
"smooth": null,
|
| 26 |
-
"strategy": "rtn",
|
| 27 |
-
"threshold": "0.5%"
|
| 28 |
-
},
|
| 29 |
-
"gc_mode": "interval",
|
| 30 |
-
"gptaq": null,
|
| 31 |
-
"hessian": {
|
| 32 |
-
"chunk_bytes": null,
|
| 33 |
-
"chunk_size": null,
|
| 34 |
-
"staging_dtype": "float32"
|
| 35 |
-
},
|
| 36 |
-
"mock_quantization": false,
|
| 37 |
-
"mse": 0.0,
|
| 38 |
-
"offload_to_disk": false,
|
| 39 |
-
"offload_to_disk_path": null,
|
| 40 |
-
"pack_impl": "cuda",
|
| 41 |
-
"quantizer": [
|
| 42 |
-
"gptqmodel:5.8.0"
|
| 43 |
-
],
|
| 44 |
-
"static_groups": false,
|
| 45 |
-
"true_sequential": true,
|
| 46 |
-
"uri": "https://github.com/modelcloud/gptqmodel",
|
| 47 |
-
"vram_strategy": "exclusive",
|
| 48 |
-
"wait_for_submodule_finalizers": false
|
| 49 |
-
},
|
| 50 |
-
"pack_dtype": "int32",
|
| 51 |
-
"quant_method": "gptq",
|
| 52 |
-
"sym": true
|
| 53 |
-
},
|
| 54 |
"text_config": {
|
| 55 |
"attention_bias": false,
|
| 56 |
"attention_dropout": 0.0,
|
| 57 |
"attn_output_gate": true,
|
| 58 |
"bos_token_id": null,
|
| 59 |
-
"
|
| 60 |
"eos_token_id": 248044,
|
| 61 |
"full_attention_interval": 4,
|
| 62 |
"head_dim": 256,
|
|
@@ -163,14 +121,13 @@
|
|
| 163 |
"vocab_size": 248320
|
| 164 |
},
|
| 165 |
"tie_word_embeddings": false,
|
| 166 |
-
"transformers_version": "5.3.0",
|
| 167 |
"unsloth_version": "2026.3.3",
|
| 168 |
"use_cache": false,
|
| 169 |
"video_token_id": 248057,
|
| 170 |
"vision_config": {
|
| 171 |
"deepstack_visual_indexes": [],
|
| 172 |
"depth": 27,
|
| 173 |
-
"
|
| 174 |
"hidden_act": "gelu_pytorch_tanh",
|
| 175 |
"hidden_size": 1152,
|
| 176 |
"in_channels": 3,
|
|
@@ -185,5 +142,47 @@
|
|
| 185 |
"temporal_patch_size": 2
|
| 186 |
},
|
| 187 |
"vision_end_token_id": 248054,
|
| 188 |
-
"vision_start_token_id": 248053
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"Qwen3_5ForConditionalGeneration"
|
| 4 |
],
|
| 5 |
"bos_token_id": null,
|
| 6 |
+
"torch_dtype": "bfloat16",
|
| 7 |
"eos_token_id": 248046,
|
| 8 |
"image_token_id": 248056,
|
| 9 |
"model_name": "qwen/Qwen3.5-27B",
|
| 10 |
"model_type": "qwen3_5",
|
| 11 |
"pad_token_id": 248044,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"text_config": {
|
| 13 |
"attention_bias": false,
|
| 14 |
"attention_dropout": 0.0,
|
| 15 |
"attn_output_gate": true,
|
| 16 |
"bos_token_id": null,
|
| 17 |
+
"torch_dtype": "bfloat16",
|
| 18 |
"eos_token_id": 248044,
|
| 19 |
"full_attention_interval": 4,
|
| 20 |
"head_dim": 256,
|
|
|
|
| 121 |
"vocab_size": 248320
|
| 122 |
},
|
| 123 |
"tie_word_embeddings": false,
|
|
|
|
| 124 |
"unsloth_version": "2026.3.3",
|
| 125 |
"use_cache": false,
|
| 126 |
"video_token_id": 248057,
|
| 127 |
"vision_config": {
|
| 128 |
"deepstack_visual_indexes": [],
|
| 129 |
"depth": 27,
|
| 130 |
+
"torch_dtype": "bfloat16",
|
| 131 |
"hidden_act": "gelu_pytorch_tanh",
|
| 132 |
"hidden_size": 1152,
|
| 133 |
"in_channels": 3,
|
|
|
|
| 142 |
"temporal_patch_size": 2
|
| 143 |
},
|
| 144 |
"vision_end_token_id": 248054,
|
| 145 |
+
"vision_start_token_id": 248053,
|
| 146 |
+
"quantization_config": {
|
| 147 |
+
"bits": 4,
|
| 148 |
+
"checkpoint_format": "gptq",
|
| 149 |
+
"desc_act": false,
|
| 150 |
+
"format": "gptq",
|
| 151 |
+
"group_size": 128,
|
| 152 |
+
"lm_head": false,
|
| 153 |
+
"meta": {
|
| 154 |
+
"act_group_aware": true,
|
| 155 |
+
"auto_forward_data_parallel": true,
|
| 156 |
+
"damp_auto_increment": 0.01,
|
| 157 |
+
"damp_percent": 0.05,
|
| 158 |
+
"failsafe": {
|
| 159 |
+
"smooth": null,
|
| 160 |
+
"strategy": "rtn",
|
| 161 |
+
"threshold": "0.5%"
|
| 162 |
+
},
|
| 163 |
+
"gc_mode": "interval",
|
| 164 |
+
"gptaq": null,
|
| 165 |
+
"hessian": {
|
| 166 |
+
"chunk_bytes": null,
|
| 167 |
+
"chunk_size": null,
|
| 168 |
+
"staging_dtype": "float32"
|
| 169 |
+
},
|
| 170 |
+
"mock_quantization": false,
|
| 171 |
+
"mse": 0.0,
|
| 172 |
+
"offload_to_disk": false,
|
| 173 |
+
"offload_to_disk_path": null,
|
| 174 |
+
"pack_impl": "cpu",
|
| 175 |
+
"quantizer": [
|
| 176 |
+
"gptqmodel:5.8.0"
|
| 177 |
+
],
|
| 178 |
+
"static_groups": false,
|
| 179 |
+
"true_sequential": true,
|
| 180 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
| 181 |
+
"vram_strategy": "exclusive",
|
| 182 |
+
"wait_for_submodule_finalizers": false
|
| 183 |
+
},
|
| 184 |
+
"pack_dtype": "int32",
|
| 185 |
+
"quant_method": "gptq",
|
| 186 |
+
"sym": true
|
| 187 |
+
}
|
| 188 |
+
}
|
generation_config.json
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_from_model_config": true,
|
| 3 |
-
"do_sample": true,
|
| 4 |
-
"eos_token_id": 248046,
|
| 5 |
-
"pad_token_id": 248044,
|
| 6 |
-
"transformers_version": "5.3.0",
|
| 7 |
-
"use_cache": false
|
| 8 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-00001-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1bf293c0532d32964a6fc226d6536dd32c890bb5c313a4b3d35aadab152f2a4
|
| 3 |
+
size 2595332032
|
model-00002-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:226401a7b406a545d70c92b6182c44bd9e1eafc8366a05f3f8341ef3f02b668c
|
| 3 |
+
size 4278206312
|
model-00003-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddf8aae8dad61d9993b67c6e2599e686479a78a05239d98a46bbacfc205a8f5e
|
| 3 |
+
size 4258595176
|
model-00004-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7687a67d0c57f3ac10ff60a29cf6adac7f0f2a060209ad8a3841c7cdf466104
|
| 3 |
+
size 4284981504
|
model-00005-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f26c195735990ff0d26de40f7c09735e12200d400f2094601185677fb2f4804e
|
| 3 |
+
size 2371438184
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00005.safetensors",
|
|
@@ -2054,353 +2054,353 @@
|
|
| 2054 |
"model.language_model.layers.9.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
|
| 2055 |
"model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
| 2056 |
"model.language_model.norm.weight": "model-00001-of-00005.safetensors",
|
| 2057 |
-
"model.visual.blocks.0.attn.proj.bias": "
|
| 2058 |
-
"model.visual.blocks.0.attn.proj.weight": "
|
| 2059 |
-
"model.visual.blocks.0.attn.qkv.bias": "
|
| 2060 |
-
"model.visual.blocks.0.attn.qkv.weight": "
|
| 2061 |
-
"model.visual.blocks.0.mlp.linear_fc1.bias": "
|
| 2062 |
-
"model.visual.blocks.0.mlp.linear_fc1.weight": "
|
| 2063 |
-
"model.visual.blocks.0.mlp.linear_fc2.bias": "
|
| 2064 |
-
"model.visual.blocks.0.mlp.linear_fc2.weight": "
|
| 2065 |
-
"model.visual.blocks.0.norm1.bias": "
|
| 2066 |
-
"model.visual.blocks.0.norm1.weight": "
|
| 2067 |
-
"model.visual.blocks.0.norm2.bias": "
|
| 2068 |
-
"model.visual.blocks.0.norm2.weight": "
|
| 2069 |
-
"model.visual.blocks.1.attn.proj.bias": "
|
| 2070 |
-
"model.visual.blocks.1.attn.proj.weight": "
|
| 2071 |
-
"model.visual.blocks.1.attn.qkv.bias": "
|
| 2072 |
-
"model.visual.blocks.1.attn.qkv.weight": "
|
| 2073 |
-
"model.visual.blocks.1.mlp.linear_fc1.bias": "
|
| 2074 |
-
"model.visual.blocks.1.mlp.linear_fc1.weight": "
|
| 2075 |
-
"model.visual.blocks.1.mlp.linear_fc2.bias": "
|
| 2076 |
-
"model.visual.blocks.1.mlp.linear_fc2.weight": "
|
| 2077 |
-
"model.visual.blocks.1.norm1.bias": "
|
| 2078 |
-
"model.visual.blocks.1.norm1.weight": "
|
| 2079 |
-
"model.visual.blocks.1.norm2.bias": "
|
| 2080 |
-
"model.visual.blocks.1.norm2.weight": "
|
| 2081 |
-
"model.visual.blocks.10.attn.proj.bias": "
|
| 2082 |
-
"model.visual.blocks.10.attn.proj.weight": "
|
| 2083 |
-
"model.visual.blocks.10.attn.qkv.bias": "
|
| 2084 |
-
"model.visual.blocks.10.attn.qkv.weight": "
|
| 2085 |
-
"model.visual.blocks.10.mlp.linear_fc1.bias": "
|
| 2086 |
-
"model.visual.blocks.10.mlp.linear_fc1.weight": "
|
| 2087 |
-
"model.visual.blocks.10.mlp.linear_fc2.bias": "
|
| 2088 |
-
"model.visual.blocks.10.mlp.linear_fc2.weight": "
|
| 2089 |
-
"model.visual.blocks.10.norm1.bias": "
|
| 2090 |
-
"model.visual.blocks.10.norm1.weight": "
|
| 2091 |
-
"model.visual.blocks.10.norm2.bias": "
|
| 2092 |
-
"model.visual.blocks.10.norm2.weight": "
|
| 2093 |
-
"model.visual.blocks.11.attn.proj.bias": "
|
| 2094 |
-
"model.visual.blocks.11.attn.proj.weight": "
|
| 2095 |
-
"model.visual.blocks.11.attn.qkv.bias": "
|
| 2096 |
-
"model.visual.blocks.11.attn.qkv.weight": "
|
| 2097 |
-
"model.visual.blocks.11.mlp.linear_fc1.bias": "
|
| 2098 |
-
"model.visual.blocks.11.mlp.linear_fc1.weight": "
|
| 2099 |
-
"model.visual.blocks.11.mlp.linear_fc2.bias": "
|
| 2100 |
-
"model.visual.blocks.11.mlp.linear_fc2.weight": "
|
| 2101 |
-
"model.visual.blocks.11.norm1.bias": "
|
| 2102 |
-
"model.visual.blocks.11.norm1.weight": "
|
| 2103 |
-
"model.visual.blocks.11.norm2.bias": "
|
| 2104 |
-
"model.visual.blocks.11.norm2.weight": "
|
| 2105 |
-
"model.visual.blocks.12.attn.proj.bias": "
|
| 2106 |
-
"model.visual.blocks.12.attn.proj.weight": "
|
| 2107 |
-
"model.visual.blocks.12.attn.qkv.bias": "
|
| 2108 |
-
"model.visual.blocks.12.attn.qkv.weight": "
|
| 2109 |
-
"model.visual.blocks.12.mlp.linear_fc1.bias": "
|
| 2110 |
-
"model.visual.blocks.12.mlp.linear_fc1.weight": "
|
| 2111 |
-
"model.visual.blocks.12.mlp.linear_fc2.bias": "
|
| 2112 |
-
"model.visual.blocks.12.mlp.linear_fc2.weight": "
|
| 2113 |
-
"model.visual.blocks.12.norm1.bias": "
|
| 2114 |
-
"model.visual.blocks.12.norm1.weight": "
|
| 2115 |
-
"model.visual.blocks.12.norm2.bias": "
|
| 2116 |
-
"model.visual.blocks.12.norm2.weight": "
|
| 2117 |
-
"model.visual.blocks.13.attn.proj.bias": "
|
| 2118 |
-
"model.visual.blocks.13.attn.proj.weight": "
|
| 2119 |
-
"model.visual.blocks.13.attn.qkv.bias": "
|
| 2120 |
-
"model.visual.blocks.13.attn.qkv.weight": "
|
| 2121 |
-
"model.visual.blocks.13.mlp.linear_fc1.bias": "
|
| 2122 |
-
"model.visual.blocks.13.mlp.linear_fc1.weight": "
|
| 2123 |
-
"model.visual.blocks.13.mlp.linear_fc2.bias": "
|
| 2124 |
-
"model.visual.blocks.13.mlp.linear_fc2.weight": "
|
| 2125 |
-
"model.visual.blocks.13.norm1.bias": "
|
| 2126 |
-
"model.visual.blocks.13.norm1.weight": "
|
| 2127 |
-
"model.visual.blocks.13.norm2.bias": "
|
| 2128 |
-
"model.visual.blocks.13.norm2.weight": "
|
| 2129 |
-
"model.visual.blocks.14.attn.proj.bias": "
|
| 2130 |
-
"model.visual.blocks.14.attn.proj.weight": "
|
| 2131 |
-
"model.visual.blocks.14.attn.qkv.bias": "
|
| 2132 |
-
"model.visual.blocks.14.attn.qkv.weight": "
|
| 2133 |
-
"model.visual.blocks.14.mlp.linear_fc1.bias": "
|
| 2134 |
-
"model.visual.blocks.14.mlp.linear_fc1.weight": "
|
| 2135 |
-
"model.visual.blocks.14.mlp.linear_fc2.bias": "
|
| 2136 |
-
"model.visual.blocks.14.mlp.linear_fc2.weight": "
|
| 2137 |
-
"model.visual.blocks.14.norm1.bias": "
|
| 2138 |
-
"model.visual.blocks.14.norm1.weight": "
|
| 2139 |
-
"model.visual.blocks.14.norm2.bias": "
|
| 2140 |
-
"model.visual.blocks.14.norm2.weight": "
|
| 2141 |
-
"model.visual.blocks.15.attn.proj.bias": "
|
| 2142 |
-
"model.visual.blocks.15.attn.proj.weight": "
|
| 2143 |
-
"model.visual.blocks.15.attn.qkv.bias": "
|
| 2144 |
-
"model.visual.blocks.15.attn.qkv.weight": "
|
| 2145 |
-
"model.visual.blocks.15.mlp.linear_fc1.bias": "
|
| 2146 |
-
"model.visual.blocks.15.mlp.linear_fc1.weight": "
|
| 2147 |
-
"model.visual.blocks.15.mlp.linear_fc2.bias": "
|
| 2148 |
-
"model.visual.blocks.15.mlp.linear_fc2.weight": "
|
| 2149 |
-
"model.visual.blocks.15.norm1.bias": "
|
| 2150 |
-
"model.visual.blocks.15.norm1.weight": "
|
| 2151 |
-
"model.visual.blocks.15.norm2.bias": "
|
| 2152 |
-
"model.visual.blocks.15.norm2.weight": "
|
| 2153 |
-
"model.visual.blocks.16.attn.proj.bias": "
|
| 2154 |
-
"model.visual.blocks.16.attn.proj.weight": "
|
| 2155 |
-
"model.visual.blocks.16.attn.qkv.bias": "
|
| 2156 |
-
"model.visual.blocks.16.attn.qkv.weight": "
|
| 2157 |
-
"model.visual.blocks.16.mlp.linear_fc1.bias": "
|
| 2158 |
-
"model.visual.blocks.16.mlp.linear_fc1.weight": "
|
| 2159 |
-
"model.visual.blocks.16.mlp.linear_fc2.bias": "
|
| 2160 |
-
"model.visual.blocks.16.mlp.linear_fc2.weight": "
|
| 2161 |
-
"model.visual.blocks.16.norm1.bias": "
|
| 2162 |
-
"model.visual.blocks.16.norm1.weight": "
|
| 2163 |
-
"model.visual.blocks.16.norm2.bias": "
|
| 2164 |
-
"model.visual.blocks.16.norm2.weight": "
|
| 2165 |
-
"model.visual.blocks.17.attn.proj.bias": "
|
| 2166 |
-
"model.visual.blocks.17.attn.proj.weight": "
|
| 2167 |
-
"model.visual.blocks.17.attn.qkv.bias": "
|
| 2168 |
-
"model.visual.blocks.17.attn.qkv.weight": "
|
| 2169 |
-
"model.visual.blocks.17.mlp.linear_fc1.bias": "
|
| 2170 |
-
"model.visual.blocks.17.mlp.linear_fc1.weight": "
|
| 2171 |
-
"model.visual.blocks.17.mlp.linear_fc2.bias": "
|
| 2172 |
-
"model.visual.blocks.17.mlp.linear_fc2.weight": "
|
| 2173 |
-
"model.visual.blocks.17.norm1.bias": "
|
| 2174 |
-
"model.visual.blocks.17.norm1.weight": "
|
| 2175 |
-
"model.visual.blocks.17.norm2.bias": "
|
| 2176 |
-
"model.visual.blocks.17.norm2.weight": "
|
| 2177 |
-
"model.visual.blocks.18.attn.proj.bias": "
|
| 2178 |
-
"model.visual.blocks.18.attn.proj.weight": "
|
| 2179 |
-
"model.visual.blocks.18.attn.qkv.bias": "
|
| 2180 |
-
"model.visual.blocks.18.attn.qkv.weight": "
|
| 2181 |
-
"model.visual.blocks.18.mlp.linear_fc1.bias": "
|
| 2182 |
-
"model.visual.blocks.18.mlp.linear_fc1.weight": "
|
| 2183 |
-
"model.visual.blocks.18.mlp.linear_fc2.bias": "
|
| 2184 |
-
"model.visual.blocks.18.mlp.linear_fc2.weight": "
|
| 2185 |
-
"model.visual.blocks.18.norm1.bias": "
|
| 2186 |
-
"model.visual.blocks.18.norm1.weight": "
|
| 2187 |
-
"model.visual.blocks.18.norm2.bias": "
|
| 2188 |
-
"model.visual.blocks.18.norm2.weight": "
|
| 2189 |
-
"model.visual.blocks.19.attn.proj.bias": "
|
| 2190 |
-
"model.visual.blocks.19.attn.proj.weight": "
|
| 2191 |
-
"model.visual.blocks.19.attn.qkv.bias": "
|
| 2192 |
-
"model.visual.blocks.19.attn.qkv.weight": "
|
| 2193 |
-
"model.visual.blocks.19.mlp.linear_fc1.bias": "
|
| 2194 |
-
"model.visual.blocks.19.mlp.linear_fc1.weight": "
|
| 2195 |
-
"model.visual.blocks.19.mlp.linear_fc2.bias": "
|
| 2196 |
-
"model.visual.blocks.19.mlp.linear_fc2.weight": "
|
| 2197 |
-
"model.visual.blocks.19.norm1.bias": "
|
| 2198 |
-
"model.visual.blocks.19.norm1.weight": "
|
| 2199 |
-
"model.visual.blocks.19.norm2.bias": "
|
| 2200 |
-
"model.visual.blocks.19.norm2.weight": "
|
| 2201 |
-
"model.visual.blocks.2.attn.proj.bias": "
|
| 2202 |
-
"model.visual.blocks.2.attn.proj.weight": "
|
| 2203 |
-
"model.visual.blocks.2.attn.qkv.bias": "
|
| 2204 |
-
"model.visual.blocks.2.attn.qkv.weight": "
|
| 2205 |
-
"model.visual.blocks.2.mlp.linear_fc1.bias": "
|
| 2206 |
-
"model.visual.blocks.2.mlp.linear_fc1.weight": "
|
| 2207 |
-
"model.visual.blocks.2.mlp.linear_fc2.bias": "
|
| 2208 |
-
"model.visual.blocks.2.mlp.linear_fc2.weight": "
|
| 2209 |
-
"model.visual.blocks.2.norm1.bias": "
|
| 2210 |
-
"model.visual.blocks.2.norm1.weight": "
|
| 2211 |
-
"model.visual.blocks.2.norm2.bias": "
|
| 2212 |
-
"model.visual.blocks.2.norm2.weight": "
|
| 2213 |
-
"model.visual.blocks.20.attn.proj.bias": "
|
| 2214 |
-
"model.visual.blocks.20.attn.proj.weight": "
|
| 2215 |
-
"model.visual.blocks.20.attn.qkv.bias": "
|
| 2216 |
-
"model.visual.blocks.20.attn.qkv.weight": "
|
| 2217 |
-
"model.visual.blocks.20.mlp.linear_fc1.bias": "
|
| 2218 |
-
"model.visual.blocks.20.mlp.linear_fc1.weight": "
|
| 2219 |
-
"model.visual.blocks.20.mlp.linear_fc2.bias": "
|
| 2220 |
-
"model.visual.blocks.20.mlp.linear_fc2.weight": "
|
| 2221 |
-
"model.visual.blocks.20.norm1.bias": "
|
| 2222 |
-
"model.visual.blocks.20.norm1.weight": "
|
| 2223 |
-
"model.visual.blocks.20.norm2.bias": "
|
| 2224 |
-
"model.visual.blocks.20.norm2.weight": "
|
| 2225 |
-
"model.visual.blocks.21.attn.proj.bias": "
|
| 2226 |
-
"model.visual.blocks.21.attn.proj.weight": "
|
| 2227 |
-
"model.visual.blocks.21.attn.qkv.bias": "
|
| 2228 |
-
"model.visual.blocks.21.attn.qkv.weight": "
|
| 2229 |
-
"model.visual.blocks.21.mlp.linear_fc1.bias": "
|
| 2230 |
-
"model.visual.blocks.21.mlp.linear_fc1.weight": "
|
| 2231 |
-
"model.visual.blocks.21.mlp.linear_fc2.bias": "
|
| 2232 |
-
"model.visual.blocks.21.mlp.linear_fc2.weight": "
|
| 2233 |
-
"model.visual.blocks.21.norm1.bias": "
|
| 2234 |
-
"model.visual.blocks.21.norm1.weight": "
|
| 2235 |
-
"model.visual.blocks.21.norm2.bias": "
|
| 2236 |
-
"model.visual.blocks.21.norm2.weight": "
|
| 2237 |
-
"model.visual.blocks.22.attn.proj.bias": "
|
| 2238 |
-
"model.visual.blocks.22.attn.proj.weight": "
|
| 2239 |
-
"model.visual.blocks.22.attn.qkv.bias": "
|
| 2240 |
-
"model.visual.blocks.22.attn.qkv.weight": "
|
| 2241 |
-
"model.visual.blocks.22.mlp.linear_fc1.bias": "
|
| 2242 |
-
"model.visual.blocks.22.mlp.linear_fc1.weight": "
|
| 2243 |
-
"model.visual.blocks.22.mlp.linear_fc2.bias": "
|
| 2244 |
-
"model.visual.blocks.22.mlp.linear_fc2.weight": "
|
| 2245 |
-
"model.visual.blocks.22.norm1.bias": "
|
| 2246 |
-
"model.visual.blocks.22.norm1.weight": "
|
| 2247 |
-
"model.visual.blocks.22.norm2.bias": "
|
| 2248 |
-
"model.visual.blocks.22.norm2.weight": "
|
| 2249 |
-
"model.visual.blocks.23.attn.proj.bias": "
|
| 2250 |
-
"model.visual.blocks.23.attn.proj.weight": "
|
| 2251 |
-
"model.visual.blocks.23.attn.qkv.bias": "
|
| 2252 |
-
"model.visual.blocks.23.attn.qkv.weight": "
|
| 2253 |
-
"model.visual.blocks.23.mlp.linear_fc1.bias": "
|
| 2254 |
-
"model.visual.blocks.23.mlp.linear_fc1.weight": "
|
| 2255 |
-
"model.visual.blocks.23.mlp.linear_fc2.bias": "
|
| 2256 |
-
"model.visual.blocks.23.mlp.linear_fc2.weight": "
|
| 2257 |
-
"model.visual.blocks.23.norm1.bias": "
|
| 2258 |
-
"model.visual.blocks.23.norm1.weight": "
|
| 2259 |
-
"model.visual.blocks.23.norm2.bias": "
|
| 2260 |
-
"model.visual.blocks.23.norm2.weight": "
|
| 2261 |
-
"model.visual.blocks.24.attn.proj.bias": "
|
| 2262 |
-
"model.visual.blocks.24.attn.proj.weight": "
|
| 2263 |
-
"model.visual.blocks.24.attn.qkv.bias": "
|
| 2264 |
-
"model.visual.blocks.24.attn.qkv.weight": "
|
| 2265 |
-
"model.visual.blocks.24.mlp.linear_fc1.bias": "
|
| 2266 |
-
"model.visual.blocks.24.mlp.linear_fc1.weight": "
|
| 2267 |
-
"model.visual.blocks.24.mlp.linear_fc2.bias": "
|
| 2268 |
-
"model.visual.blocks.24.mlp.linear_fc2.weight": "
|
| 2269 |
-
"model.visual.blocks.24.norm1.bias": "
|
| 2270 |
-
"model.visual.blocks.24.norm1.weight": "
|
| 2271 |
-
"model.visual.blocks.24.norm2.bias": "
|
| 2272 |
-
"model.visual.blocks.24.norm2.weight": "
|
| 2273 |
-
"model.visual.blocks.25.attn.proj.bias": "
|
| 2274 |
-
"model.visual.blocks.25.attn.proj.weight": "
|
| 2275 |
-
"model.visual.blocks.25.attn.qkv.bias": "
|
| 2276 |
-
"model.visual.blocks.25.attn.qkv.weight": "
|
| 2277 |
-
"model.visual.blocks.25.mlp.linear_fc1.bias": "
|
| 2278 |
-
"model.visual.blocks.25.mlp.linear_fc1.weight": "
|
| 2279 |
-
"model.visual.blocks.25.mlp.linear_fc2.bias": "
|
| 2280 |
-
"model.visual.blocks.25.mlp.linear_fc2.weight": "
|
| 2281 |
-
"model.visual.blocks.25.norm1.bias": "
|
| 2282 |
-
"model.visual.blocks.25.norm1.weight": "
|
| 2283 |
-
"model.visual.blocks.25.norm2.bias": "
|
| 2284 |
-
"model.visual.blocks.25.norm2.weight": "
|
| 2285 |
-
"model.visual.blocks.26.attn.proj.bias": "
|
| 2286 |
-
"model.visual.blocks.26.attn.proj.weight": "
|
| 2287 |
-
"model.visual.blocks.26.attn.qkv.bias": "
|
| 2288 |
-
"model.visual.blocks.26.attn.qkv.weight": "
|
| 2289 |
-
"model.visual.blocks.26.mlp.linear_fc1.bias": "
|
| 2290 |
-
"model.visual.blocks.26.mlp.linear_fc1.weight": "
|
| 2291 |
-
"model.visual.blocks.26.mlp.linear_fc2.bias": "
|
| 2292 |
-
"model.visual.blocks.26.mlp.linear_fc2.weight": "
|
| 2293 |
-
"model.visual.blocks.26.norm1.bias": "
|
| 2294 |
-
"model.visual.blocks.26.norm1.weight": "
|
| 2295 |
-
"model.visual.blocks.26.norm2.bias": "
|
| 2296 |
-
"model.visual.blocks.26.norm2.weight": "
|
| 2297 |
-
"model.visual.blocks.3.attn.proj.bias": "
|
| 2298 |
-
"model.visual.blocks.3.attn.proj.weight": "
|
| 2299 |
-
"model.visual.blocks.3.attn.qkv.bias": "
|
| 2300 |
-
"model.visual.blocks.3.attn.qkv.weight": "
|
| 2301 |
-
"model.visual.blocks.3.mlp.linear_fc1.bias": "
|
| 2302 |
-
"model.visual.blocks.3.mlp.linear_fc1.weight": "
|
| 2303 |
-
"model.visual.blocks.3.mlp.linear_fc2.bias": "
|
| 2304 |
-
"model.visual.blocks.3.mlp.linear_fc2.weight": "
|
| 2305 |
-
"model.visual.blocks.3.norm1.bias": "
|
| 2306 |
-
"model.visual.blocks.3.norm1.weight": "
|
| 2307 |
-
"model.visual.blocks.3.norm2.bias": "
|
| 2308 |
-
"model.visual.blocks.3.norm2.weight": "
|
| 2309 |
-
"model.visual.blocks.4.attn.proj.bias": "
|
| 2310 |
-
"model.visual.blocks.4.attn.proj.weight": "
|
| 2311 |
-
"model.visual.blocks.4.attn.qkv.bias": "
|
| 2312 |
-
"model.visual.blocks.4.attn.qkv.weight": "
|
| 2313 |
-
"model.visual.blocks.4.mlp.linear_fc1.bias": "
|
| 2314 |
-
"model.visual.blocks.4.mlp.linear_fc1.weight": "
|
| 2315 |
-
"model.visual.blocks.4.mlp.linear_fc2.bias": "
|
| 2316 |
-
"model.visual.blocks.4.mlp.linear_fc2.weight": "
|
| 2317 |
-
"model.visual.blocks.4.norm1.bias": "
|
| 2318 |
-
"model.visual.blocks.4.norm1.weight": "
|
| 2319 |
-
"model.visual.blocks.4.norm2.bias": "
|
| 2320 |
-
"model.visual.blocks.4.norm2.weight": "
|
| 2321 |
-
"model.visual.blocks.5.attn.proj.bias": "
|
| 2322 |
-
"model.visual.blocks.5.attn.proj.weight": "
|
| 2323 |
-
"model.visual.blocks.5.attn.qkv.bias": "
|
| 2324 |
-
"model.visual.blocks.5.attn.qkv.weight": "
|
| 2325 |
-
"model.visual.blocks.5.mlp.linear_fc1.bias": "
|
| 2326 |
-
"model.visual.blocks.5.mlp.linear_fc1.weight": "
|
| 2327 |
-
"model.visual.blocks.5.mlp.linear_fc2.bias": "
|
| 2328 |
-
"model.visual.blocks.5.mlp.linear_fc2.weight": "
|
| 2329 |
-
"model.visual.blocks.5.norm1.bias": "
|
| 2330 |
-
"model.visual.blocks.5.norm1.weight": "
|
| 2331 |
-
"model.visual.blocks.5.norm2.bias": "
|
| 2332 |
-
"model.visual.blocks.5.norm2.weight": "
|
| 2333 |
-
"model.visual.blocks.6.attn.proj.bias": "
|
| 2334 |
-
"model.visual.blocks.6.attn.proj.weight": "
|
| 2335 |
-
"model.visual.blocks.6.attn.qkv.bias": "
|
| 2336 |
-
"model.visual.blocks.6.attn.qkv.weight": "
|
| 2337 |
-
"model.visual.blocks.6.mlp.linear_fc1.bias": "
|
| 2338 |
-
"model.visual.blocks.6.mlp.linear_fc1.weight": "
|
| 2339 |
-
"model.visual.blocks.6.mlp.linear_fc2.bias": "
|
| 2340 |
-
"model.visual.blocks.6.mlp.linear_fc2.weight": "
|
| 2341 |
-
"model.visual.blocks.6.norm1.bias": "
|
| 2342 |
-
"model.visual.blocks.6.norm1.weight": "
|
| 2343 |
-
"model.visual.blocks.6.norm2.bias": "
|
| 2344 |
-
"model.visual.blocks.6.norm2.weight": "
|
| 2345 |
-
"model.visual.blocks.7.attn.proj.bias": "
|
| 2346 |
-
"model.visual.blocks.7.attn.proj.weight": "
|
| 2347 |
-
"model.visual.blocks.7.attn.qkv.bias": "
|
| 2348 |
-
"model.visual.blocks.7.attn.qkv.weight": "
|
| 2349 |
-
"model.visual.blocks.7.mlp.linear_fc1.bias": "
|
| 2350 |
-
"model.visual.blocks.7.mlp.linear_fc1.weight": "
|
| 2351 |
-
"model.visual.blocks.7.mlp.linear_fc2.bias": "
|
| 2352 |
-
"model.visual.blocks.7.mlp.linear_fc2.weight": "
|
| 2353 |
-
"model.visual.blocks.7.norm1.bias": "
|
| 2354 |
-
"model.visual.blocks.7.norm1.weight": "
|
| 2355 |
-
"model.visual.blocks.7.norm2.bias": "
|
| 2356 |
-
"model.visual.blocks.7.norm2.weight": "
|
| 2357 |
-
"model.visual.blocks.8.attn.proj.bias": "
|
| 2358 |
-
"model.visual.blocks.8.attn.proj.weight": "
|
| 2359 |
-
"model.visual.blocks.8.attn.qkv.bias": "
|
| 2360 |
-
"model.visual.blocks.8.attn.qkv.weight": "
|
| 2361 |
-
"model.visual.blocks.8.mlp.linear_fc1.bias": "
|
| 2362 |
-
"model.visual.blocks.8.mlp.linear_fc1.weight": "
|
| 2363 |
-
"model.visual.blocks.8.mlp.linear_fc2.bias": "
|
| 2364 |
-
"model.visual.blocks.8.mlp.linear_fc2.weight": "
|
| 2365 |
-
"model.visual.blocks.8.norm1.bias": "
|
| 2366 |
-
"model.visual.blocks.8.norm1.weight": "
|
| 2367 |
-
"model.visual.blocks.8.norm2.bias": "
|
| 2368 |
-
"model.visual.blocks.8.norm2.weight": "
|
| 2369 |
-
"model.visual.blocks.9.attn.proj.bias": "
|
| 2370 |
-
"model.visual.blocks.9.attn.proj.weight": "
|
| 2371 |
-
"model.visual.blocks.9.attn.qkv.bias": "
|
| 2372 |
-
"model.visual.blocks.9.attn.qkv.weight": "
|
| 2373 |
-
"model.visual.blocks.9.mlp.linear_fc1.bias": "
|
| 2374 |
-
"model.visual.blocks.9.mlp.linear_fc1.weight": "
|
| 2375 |
-
"model.visual.blocks.9.mlp.linear_fc2.bias": "
|
| 2376 |
-
"model.visual.blocks.9.mlp.linear_fc2.weight": "
|
| 2377 |
-
"model.visual.blocks.9.norm1.bias": "
|
| 2378 |
-
"model.visual.blocks.9.norm1.weight": "
|
| 2379 |
-
"model.visual.blocks.9.norm2.bias": "
|
| 2380 |
-
"model.visual.blocks.9.norm2.weight": "
|
| 2381 |
-
"model.visual.merger.linear_fc1.bias": "
|
| 2382 |
-
"model.visual.merger.linear_fc1.weight": "
|
| 2383 |
-
"model.visual.merger.linear_fc2.bias": "
|
| 2384 |
-
"model.visual.merger.linear_fc2.weight": "
|
| 2385 |
-
"model.visual.merger.norm.bias": "
|
| 2386 |
-
"model.visual.merger.norm.weight": "
|
| 2387 |
-
"model.visual.patch_embed.proj.bias": "
|
| 2388 |
-
"model.visual.patch_embed.proj.weight": "
|
| 2389 |
-
"model.visual.pos_embed.weight": "
|
| 2390 |
-
"mtp.fc.weight": "
|
| 2391 |
-
"mtp.layers.0.input_layernorm.weight": "
|
| 2392 |
-
"mtp.layers.0.mlp.down_proj.weight": "
|
| 2393 |
-
"mtp.layers.0.mlp.gate_proj.weight": "
|
| 2394 |
-
"mtp.layers.0.mlp.up_proj.weight": "
|
| 2395 |
-
"mtp.layers.0.post_attention_layernorm.weight": "
|
| 2396 |
-
"mtp.layers.0.self_attn.k_norm.weight": "
|
| 2397 |
-
"mtp.layers.0.self_attn.k_proj.weight": "
|
| 2398 |
-
"mtp.layers.0.self_attn.o_proj.weight": "
|
| 2399 |
-
"mtp.layers.0.self_attn.q_norm.weight": "
|
| 2400 |
-
"mtp.layers.0.self_attn.q_proj.weight": "
|
| 2401 |
-
"mtp.layers.0.self_attn.v_proj.weight": "
|
| 2402 |
-
"mtp.norm.weight": "
|
| 2403 |
-
"mtp.pre_fc_norm_embedding.weight": "
|
| 2404 |
-
"mtp.pre_fc_norm_hidden.weight": "
|
| 2405 |
}
|
| 2406 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 19559420239
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00005.safetensors",
|
|
|
|
| 2054 |
"model.language_model.layers.9.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
|
| 2055 |
"model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
| 2056 |
"model.language_model.norm.weight": "model-00001-of-00005.safetensors",
|
| 2057 |
+
"model.visual.blocks.0.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2058 |
+
"model.visual.blocks.0.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2059 |
+
"model.visual.blocks.0.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2060 |
+
"model.visual.blocks.0.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2061 |
+
"model.visual.blocks.0.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2062 |
+
"model.visual.blocks.0.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2063 |
+
"model.visual.blocks.0.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2064 |
+
"model.visual.blocks.0.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2065 |
+
"model.visual.blocks.0.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2066 |
+
"model.visual.blocks.0.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2067 |
+
"model.visual.blocks.0.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2068 |
+
"model.visual.blocks.0.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2069 |
+
"model.visual.blocks.1.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2070 |
+
"model.visual.blocks.1.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2071 |
+
"model.visual.blocks.1.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2072 |
+
"model.visual.blocks.1.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2073 |
+
"model.visual.blocks.1.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2074 |
+
"model.visual.blocks.1.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2075 |
+
"model.visual.blocks.1.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2076 |
+
"model.visual.blocks.1.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2077 |
+
"model.visual.blocks.1.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2078 |
+
"model.visual.blocks.1.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2079 |
+
"model.visual.blocks.1.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2080 |
+
"model.visual.blocks.1.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2081 |
+
"model.visual.blocks.10.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2082 |
+
"model.visual.blocks.10.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2083 |
+
"model.visual.blocks.10.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2084 |
+
"model.visual.blocks.10.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2085 |
+
"model.visual.blocks.10.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2086 |
+
"model.visual.blocks.10.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2087 |
+
"model.visual.blocks.10.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2088 |
+
"model.visual.blocks.10.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2089 |
+
"model.visual.blocks.10.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2090 |
+
"model.visual.blocks.10.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2091 |
+
"model.visual.blocks.10.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2092 |
+
"model.visual.blocks.10.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2093 |
+
"model.visual.blocks.11.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2094 |
+
"model.visual.blocks.11.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2095 |
+
"model.visual.blocks.11.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2096 |
+
"model.visual.blocks.11.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2097 |
+
"model.visual.blocks.11.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2098 |
+
"model.visual.blocks.11.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2099 |
+
"model.visual.blocks.11.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2100 |
+
"model.visual.blocks.11.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2101 |
+
"model.visual.blocks.11.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2102 |
+
"model.visual.blocks.11.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2103 |
+
"model.visual.blocks.11.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2104 |
+
"model.visual.blocks.11.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2105 |
+
"model.visual.blocks.12.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2106 |
+
"model.visual.blocks.12.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2107 |
+
"model.visual.blocks.12.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2108 |
+
"model.visual.blocks.12.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2109 |
+
"model.visual.blocks.12.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2110 |
+
"model.visual.blocks.12.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2111 |
+
"model.visual.blocks.12.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2112 |
+
"model.visual.blocks.12.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2113 |
+
"model.visual.blocks.12.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2114 |
+
"model.visual.blocks.12.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2115 |
+
"model.visual.blocks.12.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2116 |
+
"model.visual.blocks.12.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2117 |
+
"model.visual.blocks.13.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2118 |
+
"model.visual.blocks.13.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2119 |
+
"model.visual.blocks.13.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2120 |
+
"model.visual.blocks.13.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2121 |
+
"model.visual.blocks.13.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2122 |
+
"model.visual.blocks.13.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2123 |
+
"model.visual.blocks.13.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2124 |
+
"model.visual.blocks.13.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2125 |
+
"model.visual.blocks.13.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2126 |
+
"model.visual.blocks.13.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2127 |
+
"model.visual.blocks.13.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2128 |
+
"model.visual.blocks.13.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2129 |
+
"model.visual.blocks.14.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2130 |
+
"model.visual.blocks.14.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2131 |
+
"model.visual.blocks.14.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2132 |
+
"model.visual.blocks.14.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2133 |
+
"model.visual.blocks.14.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2134 |
+
"model.visual.blocks.14.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2135 |
+
"model.visual.blocks.14.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2136 |
+
"model.visual.blocks.14.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2137 |
+
"model.visual.blocks.14.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2138 |
+
"model.visual.blocks.14.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2139 |
+
"model.visual.blocks.14.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2140 |
+
"model.visual.blocks.14.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2141 |
+
"model.visual.blocks.15.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2142 |
+
"model.visual.blocks.15.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2143 |
+
"model.visual.blocks.15.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2144 |
+
"model.visual.blocks.15.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2145 |
+
"model.visual.blocks.15.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2146 |
+
"model.visual.blocks.15.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2147 |
+
"model.visual.blocks.15.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2148 |
+
"model.visual.blocks.15.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2149 |
+
"model.visual.blocks.15.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2150 |
+
"model.visual.blocks.15.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2151 |
+
"model.visual.blocks.15.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2152 |
+
"model.visual.blocks.15.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2153 |
+
"model.visual.blocks.16.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2154 |
+
"model.visual.blocks.16.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2155 |
+
"model.visual.blocks.16.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2156 |
+
"model.visual.blocks.16.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2157 |
+
"model.visual.blocks.16.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2158 |
+
"model.visual.blocks.16.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2159 |
+
"model.visual.blocks.16.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2160 |
+
"model.visual.blocks.16.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2161 |
+
"model.visual.blocks.16.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2162 |
+
"model.visual.blocks.16.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2163 |
+
"model.visual.blocks.16.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2164 |
+
"model.visual.blocks.16.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2165 |
+
"model.visual.blocks.17.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2166 |
+
"model.visual.blocks.17.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2167 |
+
"model.visual.blocks.17.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2168 |
+
"model.visual.blocks.17.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2169 |
+
"model.visual.blocks.17.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2170 |
+
"model.visual.blocks.17.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2171 |
+
"model.visual.blocks.17.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2172 |
+
"model.visual.blocks.17.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2173 |
+
"model.visual.blocks.17.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2174 |
+
"model.visual.blocks.17.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2175 |
+
"model.visual.blocks.17.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2176 |
+
"model.visual.blocks.17.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2177 |
+
"model.visual.blocks.18.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2178 |
+
"model.visual.blocks.18.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2179 |
+
"model.visual.blocks.18.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2180 |
+
"model.visual.blocks.18.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2181 |
+
"model.visual.blocks.18.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2182 |
+
"model.visual.blocks.18.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2183 |
+
"model.visual.blocks.18.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2184 |
+
"model.visual.blocks.18.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2185 |
+
"model.visual.blocks.18.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2186 |
+
"model.visual.blocks.18.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2187 |
+
"model.visual.blocks.18.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2188 |
+
"model.visual.blocks.18.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2189 |
+
"model.visual.blocks.19.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2190 |
+
"model.visual.blocks.19.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2191 |
+
"model.visual.blocks.19.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2192 |
+
"model.visual.blocks.19.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2193 |
+
"model.visual.blocks.19.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2194 |
+
"model.visual.blocks.19.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2195 |
+
"model.visual.blocks.19.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2196 |
+
"model.visual.blocks.19.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2197 |
+
"model.visual.blocks.19.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2198 |
+
"model.visual.blocks.19.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2199 |
+
"model.visual.blocks.19.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2200 |
+
"model.visual.blocks.19.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2201 |
+
"model.visual.blocks.2.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2202 |
+
"model.visual.blocks.2.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2203 |
+
"model.visual.blocks.2.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2204 |
+
"model.visual.blocks.2.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2205 |
+
"model.visual.blocks.2.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2206 |
+
"model.visual.blocks.2.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2207 |
+
"model.visual.blocks.2.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2208 |
+
"model.visual.blocks.2.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2209 |
+
"model.visual.blocks.2.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2210 |
+
"model.visual.blocks.2.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2211 |
+
"model.visual.blocks.2.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2212 |
+
"model.visual.blocks.2.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2213 |
+
"model.visual.blocks.20.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2214 |
+
"model.visual.blocks.20.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2215 |
+
"model.visual.blocks.20.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2216 |
+
"model.visual.blocks.20.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2217 |
+
"model.visual.blocks.20.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2218 |
+
"model.visual.blocks.20.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2219 |
+
"model.visual.blocks.20.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2220 |
+
"model.visual.blocks.20.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2221 |
+
"model.visual.blocks.20.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2222 |
+
"model.visual.blocks.20.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2223 |
+
"model.visual.blocks.20.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2224 |
+
"model.visual.blocks.20.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2225 |
+
"model.visual.blocks.21.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2226 |
+
"model.visual.blocks.21.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2227 |
+
"model.visual.blocks.21.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2228 |
+
"model.visual.blocks.21.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2229 |
+
"model.visual.blocks.21.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2230 |
+
"model.visual.blocks.21.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2231 |
+
"model.visual.blocks.21.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2232 |
+
"model.visual.blocks.21.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2233 |
+
"model.visual.blocks.21.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2234 |
+
"model.visual.blocks.21.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2235 |
+
"model.visual.blocks.21.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2236 |
+
"model.visual.blocks.21.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2237 |
+
"model.visual.blocks.22.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2238 |
+
"model.visual.blocks.22.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2239 |
+
"model.visual.blocks.22.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2240 |
+
"model.visual.blocks.22.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2241 |
+
"model.visual.blocks.22.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2242 |
+
"model.visual.blocks.22.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2243 |
+
"model.visual.blocks.22.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2244 |
+
"model.visual.blocks.22.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2245 |
+
"model.visual.blocks.22.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2246 |
+
"model.visual.blocks.22.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2247 |
+
"model.visual.blocks.22.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2248 |
+
"model.visual.blocks.22.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2249 |
+
"model.visual.blocks.23.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2250 |
+
"model.visual.blocks.23.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2251 |
+
"model.visual.blocks.23.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2252 |
+
"model.visual.blocks.23.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2253 |
+
"model.visual.blocks.23.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2254 |
+
"model.visual.blocks.23.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2255 |
+
"model.visual.blocks.23.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2256 |
+
"model.visual.blocks.23.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2257 |
+
"model.visual.blocks.23.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2258 |
+
"model.visual.blocks.23.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2259 |
+
"model.visual.blocks.23.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2260 |
+
"model.visual.blocks.23.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2261 |
+
"model.visual.blocks.24.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2262 |
+
"model.visual.blocks.24.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2263 |
+
"model.visual.blocks.24.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2264 |
+
"model.visual.blocks.24.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2265 |
+
"model.visual.blocks.24.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2266 |
+
"model.visual.blocks.24.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2267 |
+
"model.visual.blocks.24.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2268 |
+
"model.visual.blocks.24.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2269 |
+
"model.visual.blocks.24.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2270 |
+
"model.visual.blocks.24.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2271 |
+
"model.visual.blocks.24.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2272 |
+
"model.visual.blocks.24.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2273 |
+
"model.visual.blocks.25.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2274 |
+
"model.visual.blocks.25.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2275 |
+
"model.visual.blocks.25.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2276 |
+
"model.visual.blocks.25.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2277 |
+
"model.visual.blocks.25.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2278 |
+
"model.visual.blocks.25.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2279 |
+
"model.visual.blocks.25.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2280 |
+
"model.visual.blocks.25.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2281 |
+
"model.visual.blocks.25.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2282 |
+
"model.visual.blocks.25.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2283 |
+
"model.visual.blocks.25.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2284 |
+
"model.visual.blocks.25.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2285 |
+
"model.visual.blocks.26.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2286 |
+
"model.visual.blocks.26.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2287 |
+
"model.visual.blocks.26.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2288 |
+
"model.visual.blocks.26.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2289 |
+
"model.visual.blocks.26.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2290 |
+
"model.visual.blocks.26.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2291 |
+
"model.visual.blocks.26.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2292 |
+
"model.visual.blocks.26.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2293 |
+
"model.visual.blocks.26.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2294 |
+
"model.visual.blocks.26.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2295 |
+
"model.visual.blocks.26.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2296 |
+
"model.visual.blocks.26.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2297 |
+
"model.visual.blocks.3.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2298 |
+
"model.visual.blocks.3.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2299 |
+
"model.visual.blocks.3.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2300 |
+
"model.visual.blocks.3.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2301 |
+
"model.visual.blocks.3.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2302 |
+
"model.visual.blocks.3.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2303 |
+
"model.visual.blocks.3.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2304 |
+
"model.visual.blocks.3.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2305 |
+
"model.visual.blocks.3.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2306 |
+
"model.visual.blocks.3.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2307 |
+
"model.visual.blocks.3.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2308 |
+
"model.visual.blocks.3.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2309 |
+
"model.visual.blocks.4.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2310 |
+
"model.visual.blocks.4.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2311 |
+
"model.visual.blocks.4.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2312 |
+
"model.visual.blocks.4.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2313 |
+
"model.visual.blocks.4.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2314 |
+
"model.visual.blocks.4.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2315 |
+
"model.visual.blocks.4.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2316 |
+
"model.visual.blocks.4.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2317 |
+
"model.visual.blocks.4.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2318 |
+
"model.visual.blocks.4.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2319 |
+
"model.visual.blocks.4.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2320 |
+
"model.visual.blocks.4.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2321 |
+
"model.visual.blocks.5.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2322 |
+
"model.visual.blocks.5.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2323 |
+
"model.visual.blocks.5.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2324 |
+
"model.visual.blocks.5.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2325 |
+
"model.visual.blocks.5.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2326 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2327 |
+
"model.visual.blocks.5.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2328 |
+
"model.visual.blocks.5.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2329 |
+
"model.visual.blocks.5.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2330 |
+
"model.visual.blocks.5.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2331 |
+
"model.visual.blocks.5.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2332 |
+
"model.visual.blocks.5.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2333 |
+
"model.visual.blocks.6.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2334 |
+
"model.visual.blocks.6.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2335 |
+
"model.visual.blocks.6.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2336 |
+
"model.visual.blocks.6.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2337 |
+
"model.visual.blocks.6.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2338 |
+
"model.visual.blocks.6.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2339 |
+
"model.visual.blocks.6.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2340 |
+
"model.visual.blocks.6.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2341 |
+
"model.visual.blocks.6.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2342 |
+
"model.visual.blocks.6.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2343 |
+
"model.visual.blocks.6.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2344 |
+
"model.visual.blocks.6.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2345 |
+
"model.visual.blocks.7.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2346 |
+
"model.visual.blocks.7.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2347 |
+
"model.visual.blocks.7.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2348 |
+
"model.visual.blocks.7.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2349 |
+
"model.visual.blocks.7.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2350 |
+
"model.visual.blocks.7.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2351 |
+
"model.visual.blocks.7.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2352 |
+
"model.visual.blocks.7.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2353 |
+
"model.visual.blocks.7.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2354 |
+
"model.visual.blocks.7.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2355 |
+
"model.visual.blocks.7.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2356 |
+
"model.visual.blocks.7.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2357 |
+
"model.visual.blocks.8.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2358 |
+
"model.visual.blocks.8.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2359 |
+
"model.visual.blocks.8.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2360 |
+
"model.visual.blocks.8.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2361 |
+
"model.visual.blocks.8.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2362 |
+
"model.visual.blocks.8.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2363 |
+
"model.visual.blocks.8.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2364 |
+
"model.visual.blocks.8.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2365 |
+
"model.visual.blocks.8.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2366 |
+
"model.visual.blocks.8.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2367 |
+
"model.visual.blocks.8.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2368 |
+
"model.visual.blocks.8.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2369 |
+
"model.visual.blocks.9.attn.proj.bias": "visual_mtp_weights.safetensors",
|
| 2370 |
+
"model.visual.blocks.9.attn.proj.weight": "visual_mtp_weights.safetensors",
|
| 2371 |
+
"model.visual.blocks.9.attn.qkv.bias": "visual_mtp_weights.safetensors",
|
| 2372 |
+
"model.visual.blocks.9.attn.qkv.weight": "visual_mtp_weights.safetensors",
|
| 2373 |
+
"model.visual.blocks.9.mlp.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2374 |
+
"model.visual.blocks.9.mlp.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2375 |
+
"model.visual.blocks.9.mlp.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2376 |
+
"model.visual.blocks.9.mlp.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2377 |
+
"model.visual.blocks.9.norm1.bias": "visual_mtp_weights.safetensors",
|
| 2378 |
+
"model.visual.blocks.9.norm1.weight": "visual_mtp_weights.safetensors",
|
| 2379 |
+
"model.visual.blocks.9.norm2.bias": "visual_mtp_weights.safetensors",
|
| 2380 |
+
"model.visual.blocks.9.norm2.weight": "visual_mtp_weights.safetensors",
|
| 2381 |
+
"model.visual.merger.linear_fc1.bias": "visual_mtp_weights.safetensors",
|
| 2382 |
+
"model.visual.merger.linear_fc1.weight": "visual_mtp_weights.safetensors",
|
| 2383 |
+
"model.visual.merger.linear_fc2.bias": "visual_mtp_weights.safetensors",
|
| 2384 |
+
"model.visual.merger.linear_fc2.weight": "visual_mtp_weights.safetensors",
|
| 2385 |
+
"model.visual.merger.norm.bias": "visual_mtp_weights.safetensors",
|
| 2386 |
+
"model.visual.merger.norm.weight": "visual_mtp_weights.safetensors",
|
| 2387 |
+
"model.visual.patch_embed.proj.bias": "visual_mtp_weights.safetensors",
|
| 2388 |
+
"model.visual.patch_embed.proj.weight": "visual_mtp_weights.safetensors",
|
| 2389 |
+
"model.visual.pos_embed.weight": "visual_mtp_weights.safetensors",
|
| 2390 |
+
"mtp.fc.weight": "visual_mtp_weights.safetensors",
|
| 2391 |
+
"mtp.layers.0.input_layernorm.weight": "visual_mtp_weights.safetensors",
|
| 2392 |
+
"mtp.layers.0.mlp.down_proj.weight": "visual_mtp_weights.safetensors",
|
| 2393 |
+
"mtp.layers.0.mlp.gate_proj.weight": "visual_mtp_weights.safetensors",
|
| 2394 |
+
"mtp.layers.0.mlp.up_proj.weight": "visual_mtp_weights.safetensors",
|
| 2395 |
+
"mtp.layers.0.post_attention_layernorm.weight": "visual_mtp_weights.safetensors",
|
| 2396 |
+
"mtp.layers.0.self_attn.k_norm.weight": "visual_mtp_weights.safetensors",
|
| 2397 |
+
"mtp.layers.0.self_attn.k_proj.weight": "visual_mtp_weights.safetensors",
|
| 2398 |
+
"mtp.layers.0.self_attn.o_proj.weight": "visual_mtp_weights.safetensors",
|
| 2399 |
+
"mtp.layers.0.self_attn.q_norm.weight": "visual_mtp_weights.safetensors",
|
| 2400 |
+
"mtp.layers.0.self_attn.q_proj.weight": "visual_mtp_weights.safetensors",
|
| 2401 |
+
"mtp.layers.0.self_attn.v_proj.weight": "visual_mtp_weights.safetensors",
|
| 2402 |
+
"mtp.norm.weight": "visual_mtp_weights.safetensors",
|
| 2403 |
+
"mtp.pre_fc_norm_embedding.weight": "visual_mtp_weights.safetensors",
|
| 2404 |
+
"mtp.pre_fc_norm_hidden.weight": "visual_mtp_weights.safetensors"
|
| 2405 |
}
|
| 2406 |
}
|
preprocessor_config.json
CHANGED
|
@@ -1,21 +1,28 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "Qwen3VLImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"max_pixels": 16384,
|
| 18 |
+
"min_pixels": 256,
|
| 19 |
+
"patch_size": 16,
|
| 20 |
+
"processor_class": "Qwen3VLProcessor",
|
| 21 |
+
"rescale_factor": 0.00392156862745098,
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"size": {
|
| 24 |
+
"shortest_edge": 384
|
| 25 |
+
},
|
| 26 |
+
"spatial_merge_size": 2,
|
| 27 |
+
"temporal_patch_size": 2
|
| 28 |
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"data_format": "channels_first",
|
| 4 |
+
"do_convert_rgb": true,
|
| 5 |
+
"do_normalize": true,
|
| 6 |
+
"do_rescale": true,
|
| 7 |
+
"do_resize": true,
|
| 8 |
+
"image_mean": [
|
| 9 |
+
0.5,
|
| 10 |
+
0.5,
|
| 11 |
+
0.5
|
| 12 |
+
],
|
| 13 |
+
"image_processor_type": "Qwen2VLImageProcessorFast",
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
+
],
|
| 19 |
+
"merge_size": 2,
|
| 20 |
+
"patch_size": 16,
|
| 21 |
+
"resample": 3,
|
| 22 |
+
"rescale_factor": 0.00392156862745098,
|
| 23 |
+
"size": {
|
| 24 |
+
"longest_edge": 16777216,
|
| 25 |
+
"shortest_edge": 65536
|
| 26 |
+
},
|
| 27 |
+
"temporal_patch_size": 2
|
| 28 |
+
},
|
| 29 |
+
"processor_class": "Qwen3VLProcessor",
|
| 30 |
+
"video_processor": {
|
| 31 |
+
"data_format": "channels_first",
|
| 32 |
+
"default_to_square": true,
|
| 33 |
+
"do_convert_rgb": true,
|
| 34 |
+
"do_normalize": true,
|
| 35 |
+
"do_rescale": true,
|
| 36 |
+
"do_resize": true,
|
| 37 |
+
"do_sample_frames": true,
|
| 38 |
+
"fps": 2,
|
| 39 |
+
"image_mean": [
|
| 40 |
+
0.5,
|
| 41 |
+
0.5,
|
| 42 |
+
0.5
|
| 43 |
+
],
|
| 44 |
+
"image_std": [
|
| 45 |
+
0.5,
|
| 46 |
+
0.5,
|
| 47 |
+
0.5
|
| 48 |
+
],
|
| 49 |
+
"max_frames": 768,
|
| 50 |
+
"merge_size": 2,
|
| 51 |
+
"min_frames": 4,
|
| 52 |
+
"patch_size": 16,
|
| 53 |
+
"resample": 3,
|
| 54 |
+
"rescale_factor": 0.00392156862745098,
|
| 55 |
+
"return_metadata": false,
|
| 56 |
+
"size": {
|
| 57 |
+
"longest_edge": 25165824,
|
| 58 |
+
"shortest_edge": 4096
|
| 59 |
+
},
|
| 60 |
+
"temporal_patch_size": 2,
|
| 61 |
+
"video_processor_type": "Qwen3VLVideoProcessor"
|
| 62 |
+
}
|
| 63 |
+
}
|
quant_log.csv
DELETED
|
@@ -1,401 +0,0 @@
|
|
| 1 |
-
layer,module,loss,samples,damp,time
|
| 2 |
-
0,linear_attn.in_proj_qkv,0.0005375139,0.05000,4.042
|
| 3 |
-
0,linear_attn.in_proj_z,0.0003515709,0.05000,3.858
|
| 4 |
-
0,linear_attn.out_proj,0.0000002353,0.05000,4.637
|
| 5 |
-
0,mlp.gate_proj,0.0000034350,0.05000,7.864
|
| 6 |
-
0,mlp.up_proj,0.0000031435,0.05000,7.864
|
| 7 |
-
0,mlp.down_proj,0.0000000338,0.05000,14.103
|
| 8 |
-
1,linear_attn.in_proj_qkv,0.0000247697,0.05000,4.126
|
| 9 |
-
1,linear_attn.in_proj_z,0.0000152088,0.05000,3.910
|
| 10 |
-
1,linear_attn.out_proj,0.0000000828,0.05000,4.622
|
| 11 |
-
1,mlp.gate_proj,0.0000076606,0.05000,7.864
|
| 12 |
-
1,mlp.up_proj,0.0000072644,0.05000,7.887
|
| 13 |
-
1,mlp.down_proj,0.0000000610,0.05000,13.550
|
| 14 |
-
2,linear_attn.in_proj_qkv,0.0000388341,0.05000,3.911
|
| 15 |
-
2,linear_attn.in_proj_z,0.0000234090,0.05000,3.761
|
| 16 |
-
2,linear_attn.out_proj,0.0000001344,0.05000,4.565
|
| 17 |
-
2,mlp.gate_proj,0.0000142704,0.05000,7.359
|
| 18 |
-
2,mlp.up_proj,0.0000133833,0.05000,7.422
|
| 19 |
-
2,mlp.down_proj,0.0000002395,0.05000,13.392
|
| 20 |
-
3,self_attn.v_proj,0.0000288434,0.05000,12.132
|
| 21 |
-
3,self_attn.k_proj,0.0000296158,0.05000,12.167
|
| 22 |
-
3,self_attn.q_proj,0.0003874665,0.05000,12.179
|
| 23 |
-
3,self_attn.o_proj,0.0000001120,0.05000,4.360
|
| 24 |
-
3,mlp.gate_proj,0.0000190095,0.05000,7.313
|
| 25 |
-
3,mlp.up_proj,0.0000182555,0.05000,7.426
|
| 26 |
-
3,mlp.down_proj,0.0000002130,0.05000,13.499
|
| 27 |
-
4,linear_attn.in_proj_qkv,0.0000622010,0.05000,3.933
|
| 28 |
-
4,linear_attn.in_proj_z,0.0000397953,0.05000,3.702
|
| 29 |
-
4,linear_attn.out_proj,0.0000002836,0.05000,4.359
|
| 30 |
-
4,mlp.gate_proj,0.0000268327,0.05000,7.478
|
| 31 |
-
4,mlp.up_proj,0.0000257800,0.05000,7.513
|
| 32 |
-
4,mlp.down_proj,0.0000003252,0.05000,13.431
|
| 33 |
-
5,linear_attn.in_proj_qkv,0.0000803550,0.05000,3.904
|
| 34 |
-
5,linear_attn.in_proj_z,0.0000517575,0.05000,3.733
|
| 35 |
-
5,linear_attn.out_proj,0.0000003582,0.05000,4.399
|
| 36 |
-
5,mlp.gate_proj,0.0000352486,0.05000,7.595
|
| 37 |
-
5,mlp.up_proj,0.0000331223,0.05000,7.608
|
| 38 |
-
5,mlp.down_proj,0.0000005086,0.05000,13.313
|
| 39 |
-
6,linear_attn.in_proj_qkv,0.0001166319,0.05000,3.919
|
| 40 |
-
6,linear_attn.in_proj_z,0.0000688981,0.05000,3.812
|
| 41 |
-
6,linear_attn.out_proj,0.0000005175,0.05000,4.370
|
| 42 |
-
6,mlp.up_proj,0.0000443727,0.05000,7.373
|
| 43 |
-
6,mlp.gate_proj,0.0000479484,0.05000,7.446
|
| 44 |
-
6,mlp.down_proj,0.0000008847,0.05000,13.296
|
| 45 |
-
7,self_attn.v_proj,0.0000276241,0.05000,12.092
|
| 46 |
-
7,self_attn.k_proj,0.0000305602,0.05000,12.200
|
| 47 |
-
7,self_attn.q_proj,0.0003577193,0.05000,12.205
|
| 48 |
-
7,self_attn.o_proj,0.0000003202,0.05000,4.352
|
| 49 |
-
7,mlp.gate_proj,0.0000549339,0.05000,7.342
|
| 50 |
-
7,mlp.up_proj,0.0000510183,0.05000,7.372
|
| 51 |
-
7,mlp.down_proj,0.0000010120,0.05000,13.295
|
| 52 |
-
8,linear_attn.in_proj_qkv,0.0001316477,0.05000,4.026
|
| 53 |
-
8,linear_attn.in_proj_z,0.0000825992,0.05000,3.719
|
| 54 |
-
8,linear_attn.out_proj,0.0000007603,0.05000,4.337
|
| 55 |
-
8,mlp.up_proj,0.0000569455,0.05000,7.281
|
| 56 |
-
8,mlp.gate_proj,0.0000608236,0.05000,7.330
|
| 57 |
-
8,mlp.down_proj,0.0000011534,0.05000,13.460
|
| 58 |
-
9,linear_attn.in_proj_qkv,0.0001313925,0.05000,3.824
|
| 59 |
-
9,linear_attn.in_proj_z,0.0000792323,0.05000,3.677
|
| 60 |
-
9,linear_attn.out_proj,0.0000007958,0.05000,4.393
|
| 61 |
-
9,mlp.gate_proj,0.0000662348,0.05000,7.407
|
| 62 |
-
9,mlp.up_proj,0.0000623162,0.05000,7.431
|
| 63 |
-
9,mlp.down_proj,0.0000013028,0.05000,12.976
|
| 64 |
-
10,linear_attn.in_proj_qkv,0.0001373728,0.05000,3.987
|
| 65 |
-
10,linear_attn.in_proj_z,0.0000804026,0.05000,3.729
|
| 66 |
-
10,linear_attn.out_proj,0.0000009129,0.05000,4.235
|
| 67 |
-
10,mlp.up_proj,0.0000642295,0.05000,7.260
|
| 68 |
-
10,mlp.gate_proj,0.0000682146,0.05000,7.284
|
| 69 |
-
10,mlp.down_proj,0.0000014409,0.05000,13.193
|
| 70 |
-
11,self_attn.q_proj,0.0003283735,0.05000,12.208
|
| 71 |
-
11,self_attn.k_proj,0.0000299146,0.05000,12.295
|
| 72 |
-
11,self_attn.v_proj,0.0000262362,0.05000,12.378
|
| 73 |
-
11,self_attn.o_proj,0.0000005684,0.05000,4.252
|
| 74 |
-
11,mlp.up_proj,0.0000684680,0.05000,7.379
|
| 75 |
-
11,mlp.gate_proj,0.0000719400,0.05000,7.530
|
| 76 |
-
11,mlp.down_proj,0.0000016315,0.05000,13.352
|
| 77 |
-
12,linear_attn.in_proj_qkv,0.0001524303,0.05000,4.002
|
| 78 |
-
12,linear_attn.in_proj_z,0.0000888674,0.05000,3.695
|
| 79 |
-
12,linear_attn.out_proj,0.0000013819,0.05000,4.279
|
| 80 |
-
12,mlp.up_proj,0.0000704509,0.05000,7.016
|
| 81 |
-
12,mlp.gate_proj,0.0000742550,0.05000,7.178
|
| 82 |
-
12,mlp.down_proj,0.0000018874,0.05000,13.696
|
| 83 |
-
13,linear_attn.in_proj_qkv,0.0001589462,0.05000,3.818
|
| 84 |
-
13,linear_attn.in_proj_z,0.0000881903,0.05000,3.892
|
| 85 |
-
13,linear_attn.out_proj,0.0000014786,0.05000,4.371
|
| 86 |
-
13,mlp.gate_proj,0.0000791823,0.05000,7.273
|
| 87 |
-
13,mlp.up_proj,0.0000753247,0.05000,7.327
|
| 88 |
-
13,mlp.down_proj,0.0000021395,0.05000,12.876
|
| 89 |
-
14,linear_attn.in_proj_qkv,0.0001752392,0.05000,3.947
|
| 90 |
-
14,linear_attn.in_proj_z,0.0000999004,0.05000,3.712
|
| 91 |
-
14,linear_attn.out_proj,0.0000017957,0.05000,4.309
|
| 92 |
-
14,mlp.up_proj,0.0000802380,0.05000,7.191
|
| 93 |
-
14,mlp.gate_proj,0.0000841347,0.05000,7.203
|
| 94 |
-
14,mlp.down_proj,0.0000023905,0.05000,12.554
|
| 95 |
-
15,self_attn.k_proj,0.0000265395,0.05000,12.668
|
| 96 |
-
15,self_attn.q_proj,0.0002868342,0.05000,12.781
|
| 97 |
-
15,self_attn.v_proj,0.0000250176,0.05000,12.811
|
| 98 |
-
15,self_attn.o_proj,0.0000008655,0.05000,4.403
|
| 99 |
-
15,mlp.gate_proj,0.0000938290,0.05000,7.279
|
| 100 |
-
15,mlp.up_proj,0.0000904346,0.05000,7.298
|
| 101 |
-
15,mlp.down_proj,0.0000026800,0.05000,14.304
|
| 102 |
-
16,linear_attn.in_proj_qkv,0.0001868244,0.05000,4.146
|
| 103 |
-
16,linear_attn.in_proj_z,0.0001051968,0.05000,4.040
|
| 104 |
-
16,linear_attn.out_proj,0.0000020658,0.05000,4.767
|
| 105 |
-
16,mlp.gate_proj,0.0001032226,0.05000,7.769
|
| 106 |
-
16,mlp.up_proj,0.0000986931,0.05000,7.987
|
| 107 |
-
16,mlp.down_proj,0.0000031492,0.05000,14.390
|
| 108 |
-
17,linear_attn.in_proj_qkv,0.0002118973,0.05000,4.194
|
| 109 |
-
17,linear_attn.in_proj_z,0.0001054912,0.05000,4.160
|
| 110 |
-
17,linear_attn.out_proj,0.0000026375,0.05000,4.668
|
| 111 |
-
17,mlp.gate_proj,0.0001171276,0.05000,7.854
|
| 112 |
-
17,mlp.up_proj,0.0001119881,0.05000,7.933
|
| 113 |
-
17,mlp.down_proj,0.0000041048,0.05000,14.375
|
| 114 |
-
18,linear_attn.in_proj_qkv,0.0002268285,0.05000,4.154
|
| 115 |
-
18,linear_attn.in_proj_z,0.0001231153,0.05000,4.346
|
| 116 |
-
18,linear_attn.out_proj,0.0000030864,0.05000,4.998
|
| 117 |
-
18,mlp.gate_proj,0.0001437140,0.05000,8.275
|
| 118 |
-
18,mlp.up_proj,0.0001326707,0.05000,8.345
|
| 119 |
-
18,mlp.down_proj,0.0000066838,0.05000,15.096
|
| 120 |
-
19,self_attn.k_proj,0.0000351372,0.05000,13.040
|
| 121 |
-
19,self_attn.v_proj,0.0000401050,0.05000,13.295
|
| 122 |
-
19,self_attn.q_proj,0.0003581474,0.05000,13.342
|
| 123 |
-
19,self_attn.o_proj,0.0000033805,0.05000,4.513
|
| 124 |
-
19,mlp.up_proj,0.0001578480,0.05000,7.428
|
| 125 |
-
19,mlp.gate_proj,0.0001676099,0.05000,7.521
|
| 126 |
-
19,mlp.down_proj,0.0000081858,0.05000,13.584
|
| 127 |
-
20,linear_attn.in_proj_qkv,0.0003857003,0.05000,4.143
|
| 128 |
-
20,linear_attn.in_proj_z,0.0002126070,0.05000,3.825
|
| 129 |
-
20,linear_attn.out_proj,0.0000042739,0.05000,4.365
|
| 130 |
-
20,mlp.gate_proj,0.0001995256,0.05000,7.309
|
| 131 |
-
20,mlp.up_proj,0.0001841119,0.05000,7.360
|
| 132 |
-
20,mlp.down_proj,0.0000094405,0.05000,13.068
|
| 133 |
-
21,linear_attn.in_proj_qkv,0.0004770911,0.05000,4.062
|
| 134 |
-
21,linear_attn.in_proj_z,0.0002983586,0.05000,3.838
|
| 135 |
-
21,linear_attn.out_proj,0.0000051978,0.05000,4.865
|
| 136 |
-
21,mlp.gate_proj,0.0002137620,0.05000,8.588
|
| 137 |
-
21,mlp.up_proj,0.0001987904,0.05000,8.667
|
| 138 |
-
21,mlp.down_proj,0.0000097025,0.05000,14.692
|
| 139 |
-
22,linear_attn.in_proj_qkv,0.0004051315,0.05000,4.037
|
| 140 |
-
22,linear_attn.in_proj_z,0.0002439546,0.05000,3.862
|
| 141 |
-
22,linear_attn.out_proj,0.0000042765,0.05000,4.544
|
| 142 |
-
22,mlp.gate_proj,0.0002479232,0.05000,7.690
|
| 143 |
-
22,mlp.up_proj,0.0002207545,0.05000,7.696
|
| 144 |
-
22,mlp.down_proj,0.0000105950,0.05000,13.732
|
| 145 |
-
23,self_attn.k_proj,0.0000463970,0.05000,12.129
|
| 146 |
-
23,self_attn.q_proj,0.0004317670,0.05000,12.218
|
| 147 |
-
23,self_attn.v_proj,0.0000474314,0.05000,12.276
|
| 148 |
-
23,self_attn.o_proj,0.0000039161,0.05000,4.454
|
| 149 |
-
23,mlp.gate_proj,0.0002434690,0.05000,4.061
|
| 150 |
-
23,mlp.up_proj,0.0002234220,0.05000,4.068
|
| 151 |
-
23,mlp.down_proj,0.0000098514,0.05000,6.163
|
| 152 |
-
24,linear_attn.in_proj_qkv,0.0003746832,0.05000,1.630
|
| 153 |
-
24,linear_attn.in_proj_z,0.0002085239,0.05000,1.935
|
| 154 |
-
24,linear_attn.out_proj,0.0000055183,0.05000,1.881
|
| 155 |
-
24,mlp.up_proj,0.0002386453,0.05000,2.835
|
| 156 |
-
24,mlp.gate_proj,0.0002611436,0.05000,2.846
|
| 157 |
-
24,mlp.down_proj,0.0000110752,0.05000,6.217
|
| 158 |
-
25,linear_attn.in_proj_qkv,0.0003768635,0.05000,1.658
|
| 159 |
-
25,linear_attn.in_proj_z,0.0001953509,0.05000,1.896
|
| 160 |
-
25,linear_attn.out_proj,0.0000056296,0.05000,1.866
|
| 161 |
-
25,mlp.up_proj,0.0002393584,0.05000,2.809
|
| 162 |
-
25,mlp.gate_proj,0.0002474422,0.05000,2.837
|
| 163 |
-
25,mlp.down_proj,0.0000109050,0.05000,5.939
|
| 164 |
-
26,linear_attn.in_proj_qkv,0.0003443491,0.05000,1.629
|
| 165 |
-
26,linear_attn.in_proj_z,0.0001705994,0.05000,1.761
|
| 166 |
-
26,linear_attn.out_proj,0.0000062545,0.05000,1.777
|
| 167 |
-
26,mlp.up_proj,0.0002370522,0.05000,2.612
|
| 168 |
-
26,mlp.gate_proj,0.0002310437,0.05000,2.628
|
| 169 |
-
26,mlp.down_proj,0.0000114066,0.05000,14.123
|
| 170 |
-
27,self_attn.v_proj,0.0000367863,0.05000,13.241
|
| 171 |
-
27,self_attn.q_proj,0.0003905871,0.05000,13.273
|
| 172 |
-
27,self_attn.k_proj,0.0000425683,0.05000,13.340
|
| 173 |
-
27,self_attn.o_proj,0.0000076879,0.05000,4.871
|
| 174 |
-
27,mlp.up_proj,0.0002445872,0.05000,8.179
|
| 175 |
-
27,mlp.gate_proj,0.0002310220,0.05000,8.230
|
| 176 |
-
27,mlp.down_proj,0.0000117096,0.05000,14.592
|
| 177 |
-
28,linear_attn.in_proj_qkv,0.0003290252,0.05000,4.262
|
| 178 |
-
28,linear_attn.in_proj_z,0.0001524443,0.05000,4.042
|
| 179 |
-
28,linear_attn.out_proj,0.0000077834,0.05000,4.709
|
| 180 |
-
28,mlp.up_proj,0.0002347492,0.05000,8.001
|
| 181 |
-
28,mlp.gate_proj,0.0002179738,0.05000,8.055
|
| 182 |
-
28,mlp.down_proj,0.0000116015,0.05000,14.366
|
| 183 |
-
29,linear_attn.in_proj_qkv,0.0003474303,0.05000,4.223
|
| 184 |
-
29,linear_attn.in_proj_z,0.0001582735,0.05000,3.978
|
| 185 |
-
29,linear_attn.out_proj,0.0000067616,0.05000,4.703
|
| 186 |
-
29,mlp.up_proj,0.0002339276,0.05000,7.818
|
| 187 |
-
29,mlp.gate_proj,0.0002158054,0.05000,7.938
|
| 188 |
-
29,mlp.down_proj,0.0000115375,0.05000,14.244
|
| 189 |
-
30,linear_attn.in_proj_qkv,0.0003500992,0.05000,4.178
|
| 190 |
-
30,linear_attn.in_proj_z,0.0001629830,0.05000,3.946
|
| 191 |
-
30,linear_attn.out_proj,0.0000088357,0.05000,4.402
|
| 192 |
-
30,mlp.up_proj,0.0002370281,0.05000,7.381
|
| 193 |
-
30,mlp.gate_proj,0.0002136254,0.05000,7.390
|
| 194 |
-
30,mlp.down_proj,0.0000118369,0.05000,13.323
|
| 195 |
-
31,self_attn.q_proj,0.0003585615,0.05000,12.437
|
| 196 |
-
31,self_attn.v_proj,0.0000437309,0.05000,12.497
|
| 197 |
-
31,self_attn.k_proj,0.0000397583,0.05000,12.511
|
| 198 |
-
31,self_attn.o_proj,0.0000089746,0.05000,4.354
|
| 199 |
-
31,mlp.up_proj,0.0002537108,0.05000,7.347
|
| 200 |
-
31,mlp.gate_proj,0.0002248938,0.05000,7.439
|
| 201 |
-
31,mlp.down_proj,0.0000123573,0.05000,13.377
|
| 202 |
-
32,linear_attn.in_proj_qkv,0.0003709294,0.05000,3.939
|
| 203 |
-
32,linear_attn.in_proj_z,0.0001609685,0.05000,3.698
|
| 204 |
-
32,linear_attn.out_proj,0.0000087306,0.05000,4.326
|
| 205 |
-
32,mlp.up_proj,0.0002673114,0.05000,7.415
|
| 206 |
-
32,mlp.gate_proj,0.0002366684,0.05000,7.490
|
| 207 |
-
32,mlp.down_proj,0.0000130803,0.05000,13.673
|
| 208 |
-
33,linear_attn.in_proj_qkv,0.0004365302,0.05000,4.096
|
| 209 |
-
33,linear_attn.in_proj_z,0.0001785530,0.05000,3.702
|
| 210 |
-
33,linear_attn.out_proj,0.0000097492,0.05000,4.057
|
| 211 |
-
33,mlp.gate_proj,0.0002480511,0.05000,7.279
|
| 212 |
-
33,mlp.up_proj,0.0002804281,0.05000,7.308
|
| 213 |
-
33,mlp.down_proj,0.0000136580,0.05000,13.334
|
| 214 |
-
34,linear_attn.in_proj_qkv,0.0004258669,0.05000,3.831
|
| 215 |
-
34,linear_attn.in_proj_z,0.0001928501,0.05000,3.721
|
| 216 |
-
34,linear_attn.out_proj,0.0000102707,0.05000,4.442
|
| 217 |
-
34,mlp.up_proj,0.0003019740,0.05000,7.321
|
| 218 |
-
34,mlp.gate_proj,0.0002771344,0.05000,7.366
|
| 219 |
-
34,mlp.down_proj,0.0000191384,0.05000,13.280
|
| 220 |
-
35,self_attn.v_proj,0.0000684588,0.05000,11.710
|
| 221 |
-
35,self_attn.k_proj,0.0000486297,0.05000,11.843
|
| 222 |
-
35,self_attn.q_proj,0.0004456763,0.05000,11.909
|
| 223 |
-
35,self_attn.o_proj,0.0000145637,0.05000,4.427
|
| 224 |
-
35,mlp.gate_proj,0.0002873185,0.05000,7.143
|
| 225 |
-
35,mlp.up_proj,0.0003080965,0.05000,7.252
|
| 226 |
-
35,mlp.down_proj,0.0000221059,0.05000,13.145
|
| 227 |
-
36,linear_attn.in_proj_qkv,0.0006038611,0.05000,3.909
|
| 228 |
-
36,linear_attn.in_proj_z,0.0003008038,0.05000,3.728
|
| 229 |
-
36,linear_attn.out_proj,0.0000098270,0.05000,4.396
|
| 230 |
-
36,mlp.up_proj,0.0003041932,0.05000,7.369
|
| 231 |
-
36,mlp.gate_proj,0.0003187759,0.05000,7.375
|
| 232 |
-
36,mlp.down_proj,0.0000210562,0.05000,13.447
|
| 233 |
-
37,linear_attn.in_proj_qkv,0.0006587347,0.05000,3.827
|
| 234 |
-
37,linear_attn.in_proj_z,0.0003705355,0.05000,3.655
|
| 235 |
-
37,linear_attn.out_proj,0.0000103849,0.05000,4.382
|
| 236 |
-
37,mlp.up_proj,0.0003054113,0.05000,7.238
|
| 237 |
-
37,mlp.gate_proj,0.0003235339,0.05000,7.413
|
| 238 |
-
37,mlp.down_proj,0.0000197475,0.05000,13.285
|
| 239 |
-
38,linear_attn.in_proj_qkv,0.0005866436,0.05000,3.831
|
| 240 |
-
38,linear_attn.in_proj_z,0.0003399069,0.05000,3.728
|
| 241 |
-
38,linear_attn.out_proj,0.0000070955,0.05000,4.388
|
| 242 |
-
38,mlp.up_proj,0.0003502550,0.05000,7.342
|
| 243 |
-
38,mlp.gate_proj,0.0004350511,0.05000,7.477
|
| 244 |
-
38,mlp.down_proj,0.0000197556,0.05000,13.336
|
| 245 |
-
39,self_attn.v_proj,0.0000690332,0.05000,11.787
|
| 246 |
-
39,self_attn.q_proj,0.0005133493,0.05000,11.855
|
| 247 |
-
39,self_attn.k_proj,0.0000577507,0.05000,11.959
|
| 248 |
-
39,self_attn.o_proj,0.0000081428,0.05000,4.310
|
| 249 |
-
39,mlp.up_proj,0.0003293339,0.05000,7.327
|
| 250 |
-
39,mlp.gate_proj,0.0003795339,0.05000,7.377
|
| 251 |
-
39,mlp.down_proj,0.0000167872,0.05000,13.394
|
| 252 |
-
40,linear_attn.in_proj_qkv,0.0005513974,0.05000,3.892
|
| 253 |
-
40,linear_attn.in_proj_z,0.0003037444,0.05000,3.709
|
| 254 |
-
40,linear_attn.out_proj,0.0000087935,0.05000,4.374
|
| 255 |
-
40,mlp.gate_proj,0.0004035835,0.05000,7.251
|
| 256 |
-
40,mlp.up_proj,0.0003409987,0.05000,7.300
|
| 257 |
-
40,mlp.down_proj,0.0000168670,0.05000,13.312
|
| 258 |
-
41,linear_attn.in_proj_qkv,0.0004881215,0.05000,3.890
|
| 259 |
-
41,linear_attn.in_proj_z,0.0002671910,0.05000,3.763
|
| 260 |
-
41,linear_attn.out_proj,0.0000075667,0.05000,4.354
|
| 261 |
-
41,mlp.gate_proj,0.0003435877,0.05000,7.206
|
| 262 |
-
41,mlp.up_proj,0.0003245952,0.05000,7.311
|
| 263 |
-
41,mlp.down_proj,0.0000161043,0.05000,13.384
|
| 264 |
-
42,linear_attn.in_proj_qkv,0.0004569151,0.05000,3.859
|
| 265 |
-
42,linear_attn.in_proj_z,0.0002390392,0.05000,3.677
|
| 266 |
-
42,linear_attn.out_proj,0.0000090846,0.05000,4.343
|
| 267 |
-
42,mlp.gate_proj,0.0003117448,0.05000,7.278
|
| 268 |
-
42,mlp.up_proj,0.0003180992,0.05000,7.370
|
| 269 |
-
42,mlp.down_proj,0.0000171017,0.05000,13.210
|
| 270 |
-
43,self_attn.k_proj,0.0000566166,0.05000,11.748
|
| 271 |
-
43,self_attn.q_proj,0.0004798718,0.05000,11.838
|
| 272 |
-
43,self_attn.v_proj,0.0000652786,0.05000,11.869
|
| 273 |
-
43,self_attn.o_proj,0.0000125740,0.05000,4.373
|
| 274 |
-
43,mlp.gate_proj,0.0003006564,0.05000,7.244
|
| 275 |
-
43,mlp.up_proj,0.0003207947,0.05000,7.330
|
| 276 |
-
43,mlp.down_proj,0.0000184054,0.05000,13.209
|
| 277 |
-
44,linear_attn.in_proj_qkv,0.0004242027,0.05000,3.969
|
| 278 |
-
44,linear_attn.in_proj_z,0.0002068353,0.05000,3.729
|
| 279 |
-
44,linear_attn.out_proj,0.0000130579,0.05000,4.383
|
| 280 |
-
44,mlp.gate_proj,0.0002931194,0.05000,7.492
|
| 281 |
-
44,mlp.up_proj,0.0003164854,0.05000,7.504
|
| 282 |
-
44,mlp.down_proj,0.0000208067,0.05000,13.460
|
| 283 |
-
45,linear_attn.in_proj_qkv,0.0004198022,0.05000,3.889
|
| 284 |
-
45,linear_attn.in_proj_z,0.0002044268,0.05000,3.712
|
| 285 |
-
45,linear_attn.out_proj,0.0000100431,0.05000,4.342
|
| 286 |
-
45,mlp.up_proj,0.0003182327,0.05000,7.222
|
| 287 |
-
45,mlp.gate_proj,0.0002921933,0.05000,7.309
|
| 288 |
-
45,mlp.down_proj,0.0000206568,0.05000,13.297
|
| 289 |
-
46,linear_attn.in_proj_qkv,0.0004432378,0.05000,3.811
|
| 290 |
-
46,linear_attn.in_proj_z,0.0002188176,0.05000,3.578
|
| 291 |
-
46,linear_attn.out_proj,0.0000159958,0.05000,4.362
|
| 292 |
-
46,mlp.up_proj,0.0003269873,0.05000,7.209
|
| 293 |
-
46,mlp.gate_proj,0.0002911491,0.05000,7.326
|
| 294 |
-
46,mlp.down_proj,0.0000221925,0.05000,13.306
|
| 295 |
-
47,self_attn.q_proj,0.0004898258,0.05000,11.976
|
| 296 |
-
47,self_attn.k_proj,0.0000527123,0.05000,12.000
|
| 297 |
-
47,self_attn.v_proj,0.0000802256,0.05000,12.073
|
| 298 |
-
47,self_attn.o_proj,0.0000113472,0.05000,4.347
|
| 299 |
-
47,mlp.up_proj,0.0003570259,0.05000,7.197
|
| 300 |
-
47,mlp.gate_proj,0.0003197138,0.05000,7.244
|
| 301 |
-
47,mlp.down_proj,0.0000265334,0.05000,13.358
|
| 302 |
-
48,linear_attn.in_proj_qkv,0.0004879047,0.05000,4.045
|
| 303 |
-
48,linear_attn.in_proj_z,0.0002313808,0.05000,3.735
|
| 304 |
-
48,linear_attn.out_proj,0.0000175865,0.05000,4.593
|
| 305 |
-
48,mlp.up_proj,0.0003831256,0.05000,7.891
|
| 306 |
-
48,mlp.gate_proj,0.0003445416,0.05000,7.934
|
| 307 |
-
48,mlp.down_proj,0.0000335000,0.05000,14.167
|
| 308 |
-
49,linear_attn.in_proj_qkv,0.0005885937,0.05000,4.504
|
| 309 |
-
49,linear_attn.in_proj_z,0.0002457014,0.05000,4.097
|
| 310 |
-
49,linear_attn.out_proj,0.0000237044,0.05000,4.843
|
| 311 |
-
49,mlp.gate_proj,0.0003840848,0.05000,7.974
|
| 312 |
-
49,mlp.up_proj,0.0004163043,0.05000,8.228
|
| 313 |
-
49,mlp.down_proj,0.0000451689,0.05000,14.632
|
| 314 |
-
50,linear_attn.in_proj_qkv,0.0006162443,0.05000,4.071
|
| 315 |
-
50,linear_attn.in_proj_z,0.0002804692,0.05000,3.924
|
| 316 |
-
50,linear_attn.out_proj,0.0000349245,0.05000,4.456
|
| 317 |
-
50,mlp.up_proj,0.0004862685,0.05000,7.537
|
| 318 |
-
50,mlp.gate_proj,0.0004747827,0.05000,7.751
|
| 319 |
-
50,mlp.down_proj,0.0000826267,0.05000,13.794
|
| 320 |
-
51,self_attn.v_proj,0.0001619573,0.05000,13.278
|
| 321 |
-
51,self_attn.q_proj,0.0006273309,0.05000,13.370
|
| 322 |
-
51,self_attn.k_proj,0.0000692513,0.05000,13.392
|
| 323 |
-
51,self_attn.o_proj,0.0000327376,0.05000,4.272
|
| 324 |
-
51,mlp.up_proj,0.0005253989,0.05000,6.498
|
| 325 |
-
51,mlp.gate_proj,0.0004956579,0.05000,6.636
|
| 326 |
-
51,mlp.down_proj,0.0001026185,0.05000,13.159
|
| 327 |
-
52,linear_attn.in_proj_qkv,0.0009266171,0.05000,3.825
|
| 328 |
-
52,linear_attn.in_proj_z,0.0004137491,0.05000,3.629
|
| 329 |
-
52,linear_attn.out_proj,0.0000446695,0.05000,4.312
|
| 330 |
-
52,mlp.up_proj,0.0005780998,0.05000,7.308
|
| 331 |
-
52,mlp.gate_proj,0.0006152220,0.05000,7.324
|
| 332 |
-
52,mlp.down_proj,0.0001118123,0.05000,13.300
|
| 333 |
-
53,linear_attn.in_proj_qkv,0.0009211465,0.05000,3.892
|
| 334 |
-
53,linear_attn.in_proj_z,0.0004431443,0.05000,3.709
|
| 335 |
-
53,linear_attn.out_proj,0.0000514739,0.05000,4.370
|
| 336 |
-
53,mlp.gate_proj,0.0007024802,0.05000,7.420
|
| 337 |
-
53,mlp.up_proj,0.0006290088,0.05000,7.470
|
| 338 |
-
53,mlp.down_proj,0.0001273530,0.05000,13.885
|
| 339 |
-
54,linear_attn.in_proj_qkv,0.0009073259,0.05000,4.184
|
| 340 |
-
54,linear_attn.in_proj_z,0.0004801113,0.05000,3.902
|
| 341 |
-
54,linear_attn.out_proj,0.0000492737,0.05000,4.573
|
| 342 |
-
54,mlp.gate_proj,0.0009067172,0.05000,7.734
|
| 343 |
-
54,mlp.up_proj,0.0007649405,0.05000,7.785
|
| 344 |
-
54,mlp.down_proj,0.0001912621,0.05000,13.749
|
| 345 |
-
55,self_attn.v_proj,0.0002226114,0.05000,11.937
|
| 346 |
-
55,self_attn.k_proj,0.0001067982,0.05000,12.172
|
| 347 |
-
55,self_attn.q_proj,0.0008758757,0.05000,12.200
|
| 348 |
-
55,self_attn.o_proj,0.0000413251,0.05000,4.451
|
| 349 |
-
55,mlp.gate_proj,0.0008657420,0.05000,7.637
|
| 350 |
-
55,mlp.up_proj,0.0007707832,0.05000,7.694
|
| 351 |
-
55,mlp.down_proj,0.0001647693,0.05000,13.720
|
| 352 |
-
56,linear_attn.in_proj_qkv,0.0010866812,0.05000,1.976
|
| 353 |
-
56,linear_attn.in_proj_z,0.0005330881,0.05000,1.723
|
| 354 |
-
56,linear_attn.out_proj,0.0000856800,0.05000,1.750
|
| 355 |
-
56,mlp.gate_proj,0.0009597083,0.05000,2.876
|
| 356 |
-
56,mlp.up_proj,0.0008358503,0.05000,2.896
|
| 357 |
-
56,mlp.down_proj,0.0001710205,0.05000,6.148
|
| 358 |
-
57,linear_attn.in_proj_qkv,0.0011609036,0.05000,1.614
|
| 359 |
-
57,linear_attn.in_proj_z,0.0005589580,0.05000,1.844
|
| 360 |
-
57,linear_attn.out_proj,0.0000814013,0.05000,1.834
|
| 361 |
-
57,mlp.gate_proj,0.0011157731,0.05000,2.722
|
| 362 |
-
57,mlp.up_proj,0.0009594462,0.05000,2.735
|
| 363 |
-
57,mlp.down_proj,0.0001745235,0.05000,6.145
|
| 364 |
-
58,linear_attn.in_proj_qkv,0.0011074618,0.05000,1.627
|
| 365 |
-
58,linear_attn.in_proj_z,0.0005856057,0.05000,1.876
|
| 366 |
-
58,linear_attn.out_proj,0.0000750041,0.05000,1.704
|
| 367 |
-
58,mlp.up_proj,0.0011001359,0.05000,2.567
|
| 368 |
-
58,mlp.gate_proj,0.0012845446,0.05000,2.602
|
| 369 |
-
58,mlp.down_proj,0.0002041286,0.05000,5.796
|
| 370 |
-
59,self_attn.v_proj,0.0004657917,0.05000,11.021
|
| 371 |
-
59,self_attn.k_proj,0.0001350715,0.05000,11.043
|
| 372 |
-
59,self_attn.q_proj,0.0010552907,0.05000,11.053
|
| 373 |
-
59,self_attn.o_proj,0.0001216906,0.05000,4.675
|
| 374 |
-
59,mlp.up_proj,0.0011041036,0.05000,8.476
|
| 375 |
-
59,mlp.gate_proj,0.0012555530,0.05000,8.495
|
| 376 |
-
59,mlp.down_proj,0.0002444422,0.05000,14.802
|
| 377 |
-
60,linear_attn.in_proj_qkv,0.0013573468,0.05000,4.353
|
| 378 |
-
60,linear_attn.in_proj_z,0.0006349659,0.05000,4.117
|
| 379 |
-
60,linear_attn.out_proj,0.0001707409,0.05000,4.842
|
| 380 |
-
60,mlp.up_proj,0.0011231104,0.05000,8.090
|
| 381 |
-
60,mlp.gate_proj,0.0012666935,0.05000,8.147
|
| 382 |
-
60,mlp.down_proj,0.0002962877,0.05000,14.318
|
| 383 |
-
61,linear_attn.in_proj_qkv,0.0009809255,0.05000,4.231
|
| 384 |
-
61,linear_attn.in_proj_z,0.0005509510,0.05000,4.022
|
| 385 |
-
61,linear_attn.out_proj,0.0001616595,0.05000,4.698
|
| 386 |
-
61,mlp.gate_proj,0.0013469172,0.05000,8.006
|
| 387 |
-
61,mlp.up_proj,0.0011976919,0.05000,8.052
|
| 388 |
-
61,mlp.down_proj,0.0003628120,0.05000,14.145
|
| 389 |
-
62,linear_attn.in_proj_qkv,0.0010928032,0.05000,4.179
|
| 390 |
-
62,linear_attn.in_proj_z,0.0005702188,0.05000,3.975
|
| 391 |
-
62,linear_attn.out_proj,0.0002907203,0.05000,4.675
|
| 392 |
-
62,mlp.gate_proj,0.0012338856,0.05000,7.959
|
| 393 |
-
62,mlp.up_proj,0.0011025960,0.05000,7.974
|
| 394 |
-
62,mlp.down_proj,0.0005055889,0.05000,14.051
|
| 395 |
-
63,self_attn.q_proj,0.0009252999,0.05000,11.630
|
| 396 |
-
63,self_attn.v_proj,0.0003772193,0.05000,11.671
|
| 397 |
-
63,self_attn.k_proj,0.0001241435,0.05000,11.675
|
| 398 |
-
63,self_attn.o_proj,0.0003580288,0.05000,4.633
|
| 399 |
-
63,mlp.up_proj,0.0009046681,0.05000,7.865
|
| 400 |
-
63,mlp.gate_proj,0.0010507936,0.05000,7.868
|
| 401 |
-
63,mlp.down_proj,0.0010416286,0.05000,14.094
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
quantize_config.json
CHANGED
|
@@ -25,7 +25,7 @@
|
|
| 25 |
},
|
| 26 |
"offload_to_disk": false,
|
| 27 |
"offload_to_disk_path": null,
|
| 28 |
-
"pack_impl": "
|
| 29 |
"mock_quantization": false,
|
| 30 |
"gc_mode": "interval",
|
| 31 |
"wait_for_submodule_finalizers": false,
|
|
|
|
| 25 |
},
|
| 26 |
"offload_to_disk": false,
|
| 27 |
"offload_to_disk_path": null,
|
| 28 |
+
"pack_impl": "cpu",
|
| 29 |
"mock_quantization": false,
|
| 30 |
"gc_mode": "interval",
|
| 31 |
"wait_for_submodule_finalizers": false,
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
|
| 3 |
+
size 19989343
|
tokenizer_config.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
"eos_token": "<|im_end|>",
|
| 10 |
"errors": "replace",
|
| 11 |
"image_token": "<|image_pad|>",
|
| 12 |
-
"is_local":
|
| 13 |
"model_max_length": 262144,
|
| 14 |
"model_specific_special_tokens": {
|
| 15 |
"audio_bos_token": "<|audio_start|>",
|
|
@@ -25,9 +25,10 @@
|
|
| 25 |
"pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
| 26 |
"processor_class": "Qwen3VLProcessor",
|
| 27 |
"split_special_tokens": false,
|
| 28 |
-
"tokenizer_class": "
|
| 29 |
"unk_token": null,
|
| 30 |
"video_token": "<|video_pad|>",
|
| 31 |
"vision_bos_token": "<|vision_start|>",
|
| 32 |
-
"vision_eos_token": "<|vision_end|>"
|
| 33 |
-
|
|
|
|
|
|
| 9 |
"eos_token": "<|im_end|>",
|
| 10 |
"errors": "replace",
|
| 11 |
"image_token": "<|image_pad|>",
|
| 12 |
+
"is_local": false,
|
| 13 |
"model_max_length": 262144,
|
| 14 |
"model_specific_special_tokens": {
|
| 15 |
"audio_bos_token": "<|audio_start|>",
|
|
|
|
| 25 |
"pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
| 26 |
"processor_class": "Qwen3VLProcessor",
|
| 27 |
"split_special_tokens": false,
|
| 28 |
+
"tokenizer_class": "TokenizersBackendFast",
|
| 29 |
"unk_token": null,
|
| 30 |
"video_token": "<|video_pad|>",
|
| 31 |
"vision_bos_token": "<|vision_start|>",
|
| 32 |
+
"vision_eos_token": "<|vision_end|>",
|
| 33 |
+
"_commit_hash": null
|
| 34 |
+
}
|
mtp.safetensors → visual_mtp_weights.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79c445390601445b28d78cff728d2cce1630310c10a5cfff8ebe08cdc503eabb
|
| 3 |
+
size 1770897616
|