cpatonn commited on
Commit
380ec50
·
verified ·
1 Parent(s): 97f902f

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -5,6 +5,7 @@
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "decoder_sparse_step": 1,
 
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
@@ -27,10 +28,17 @@
27
  "quantization_config": {
28
  "config_groups": {
29
  "group_0": {
 
30
  "input_activations": null,
31
  "output_activations": null,
32
  "targets": [
33
- "Linear"
 
 
 
 
 
 
34
  ],
35
  "weights": {
36
  "actorder": null,
@@ -38,7 +46,7 @@
38
  "dynamic": false,
39
  "group_size": 32,
40
  "num_bits": 8,
41
- "observer": "minmax",
42
  "observer_kwargs": {},
43
  "strategy": "group",
44
  "symmetric": true,
@@ -101,7 +109,10 @@
101
  ],
102
  "kv_cache_scheme": null,
103
  "quant_method": "compressed-tensors",
104
- "quantization_status": "compressed"
 
 
 
105
  },
106
  "rms_norm_eps": 1e-06,
107
  "rope_scaling": null,
@@ -110,8 +121,7 @@
110
  "shared_expert_intermediate_size": 0,
111
  "sliding_window": null,
112
  "tie_word_embeddings": false,
113
- "torch_dtype": "bfloat16",
114
- "transformers_version": "4.55.0",
115
  "use_cache": true,
116
  "use_qk_norm": true,
117
  "use_sliding_window": false,
 
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "decoder_sparse_step": 1,
8
+ "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
 
28
  "quantization_config": {
29
  "config_groups": {
30
  "group_0": {
31
+ "format": "pack-quantized",
32
  "input_activations": null,
33
  "output_activations": null,
34
  "targets": [
35
+ "re:.*gate_proj.*",
36
+ "re:.*up_proj.*",
37
+ "re:.*down_proj.*",
38
+ "re:.*k_proj.*",
39
+ "re:.*q_proj.*",
40
+ "re:.*v_proj.*",
41
+ "re:.*o_proj.*"
42
  ],
43
  "weights": {
44
  "actorder": null,
 
46
  "dynamic": false,
47
  "group_size": 32,
48
  "num_bits": 8,
49
+ "observer": "mse",
50
  "observer_kwargs": {},
51
  "strategy": "group",
52
  "symmetric": true,
 
109
  ],
110
  "kv_cache_scheme": null,
111
  "quant_method": "compressed-tensors",
112
+ "quantization_status": "compressed",
113
+ "sparsity_config": {},
114
+ "transform_config": {},
115
+ "version": "0.10.3.dev47+ge463fe6"
116
  },
117
  "rms_norm_eps": 1e-06,
118
  "rope_scaling": null,
 
121
  "shared_expert_intermediate_size": 0,
122
  "sliding_window": null,
123
  "tie_word_embeddings": false,
124
+ "transformers_version": "4.56.0.dev0",
 
125
  "use_cache": true,
126
  "use_qk_norm": true,
127
  "use_sliding_window": false,
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
- "transformers_version": "4.55.0"
13
  }
 
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
+ "transformers_version": "4.56.0.dev0"
13
  }
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8f47b5f1df9e0400882a41bb091e353311bd0d32073f8c8380b176b900df160
3
  size 5000483312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a631b2ca427653803cb56731c6fa15c2cffac5a022cdfd9d5a1582fa15516c
3
  size 5000483312
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85d385cb604730361b3140df95c0d9a214b6ef1d49b7eeead7fdf8965d7145e
3
  size 5000514520
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05552b1f2d2e6df1c7d68799067f2f00b27b6bb89f1a5c63515188541128e897
3
  size 5000514520
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa91412ac519b2d9f7b47ee22f6934bd1411dcd764d4a3fcb60da3048405082d
3
  size 4999987936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c2238e8fcb78fc7640bf2cc8d570d0d994f995535035330cc6c6f0a107de02
3
  size 4999987936
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de7d38892ae8716c2db64de348f458ad8e29e1c051a1cfe2dc7b56f66e7a24b3
3
  size 5000518496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adcfee886dee742f9a759d78ca9085570e4f10b2dc89f114d68e9f18c61105b5
3
  size 5000518496
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7845edf2d7a924b61c8e2a68dd45db89ffe4e22824a9ec2656853d9fa1fcb15
3
  size 4999987952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a3e3635a9ded02f0df27e91d609e85d76e036684079ccebeeab6dd233d5c924
3
  size 4999987952
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8b7adfd181d9b4579aa98756d7d480baa0413bc79b105643b6b09025ac22122
3
  size 5000518480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36aa4b438cd05d24b785f4bd77beb30793c1a404f78496b4cf74c94af585bad0
3
  size 5000518480
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cddda0170555478b607e02b57cb8af0bdf54ff7d1c6b30a79b15302e7cd8b5ed
3
  size 3040999600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b79d2bbf259c2457410d901c7618c0c0a014b16f23469adbefc55c27a2de06
3
  size 3040999600
recipe.yaml CHANGED
@@ -1,9 +1,10 @@
1
- default_stage:
2
- default_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
6
- targets: [Linear]
 
7
  weights:
8
  num_bits: 8
9
  type: int
@@ -13,12 +14,14 @@ default_stage:
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
- observer: minmax
17
  observer_kwargs: {}
18
  input_activations: null
19
  output_activations: null
 
20
  targets: [Linear]
21
- ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
 
22
  mappings:
23
  - smooth_layer: re:.*input_layernorm$
24
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
 
1
+ quant_stage:
2
+ quant_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
6
+ targets: ['re:.*gate_proj.*', 're:.*up_proj.*', 're:.*down_proj.*', 're:.*k_proj.*',
7
+ 're:.*q_proj.*', 're:.*v_proj.*', 're:.*o_proj.*']
8
  weights:
9
  num_bits: 8
10
  type: int
 
14
  block_structure: null
15
  dynamic: false
16
  actorder: null
17
+ observer: mse
18
  observer_kwargs: {}
19
  input_activations: null
20
  output_activations: null
21
+ format: null
22
  targets: [Linear]
23
+ ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
24
+ model.norm, 're:.*shared_experts.*', 're:.*mlp.gate']
25
  mappings:
26
  - smooth_layer: re:.*input_layernorm$
27
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']