Upload folder using huggingface_hub

Files changed (10) hide show

config.json CHANGED Viewed

@@ -5,6 +5,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "decoder_sparse_step": 1,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
@@ -27,10 +28,17 @@
   "quantization_config": {
     "config_groups": {
       "group_0": {
         "input_activations": null,
         "output_activations": null,
         "targets": [
-          "Linear"
         ],
         "weights": {
           "actorder": null,
@@ -38,7 +46,7 @@
           "dynamic": false,
           "group_size": 32,
           "num_bits": 8,
-          "observer": "minmax",
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
@@ -101,7 +109,10 @@
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
-    "quantization_status": "compressed"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
@@ -110,8 +121,7 @@
   "shared_expert_intermediate_size": 0,
   "sliding_window": null,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.0",
   "use_cache": true,
   "use_qk_norm": true,
   "use_sliding_window": false,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "decoder_sparse_step": 1,
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "quantization_config": {
     "config_groups": {
       "group_0": {
+        "format": "pack-quantized",
         "input_activations": null,
         "output_activations": null,
         "targets": [
+          "re:.*gate_proj.*",
+          "re:.*up_proj.*",
+          "re:.*down_proj.*",
+          "re:.*k_proj.*",
+          "re:.*q_proj.*",
+          "re:.*v_proj.*",
+          "re:.*o_proj.*"
         ],
         "weights": {
           "actorder": null,
           "dynamic": false,
           "group_size": 32,
           "num_bits": 8,
+          "observer": "mse",
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.10.3.dev47+ge463fe6"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "shared_expert_intermediate_size": 0,
   "sliding_window": null,
   "tie_word_embeddings": false,
+  "transformers_version": "4.56.0.dev0",
   "use_cache": true,
   "use_qk_norm": true,
   "use_sliding_window": false,

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "temperature": 0.7,
   "top_k": 20,
   "top_p": 0.8,
-  "transformers_version": "4.55.0"
 }

   "temperature": 0.7,
   "top_k": 20,
   "top_p": 0.8,
+  "transformers_version": "4.56.0.dev0"
 }

model-00001-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8f47b5f1df9e0400882a41bb091e353311bd0d32073f8c8380b176b900df160
 size 5000483312

 version https://git-lfs.github.com/spec/v1
+oid sha256:98a631b2ca427653803cb56731c6fa15c2cffac5a022cdfd9d5a1582fa15516c
 size 5000483312

model-00002-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b85d385cb604730361b3140df95c0d9a214b6ef1d49b7eeead7fdf8965d7145e
 size 5000514520

 version https://git-lfs.github.com/spec/v1
+oid sha256:05552b1f2d2e6df1c7d68799067f2f00b27b6bb89f1a5c63515188541128e897
 size 5000514520

model-00003-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa91412ac519b2d9f7b47ee22f6934bd1411dcd764d4a3fcb60da3048405082d
 size 4999987936

 version https://git-lfs.github.com/spec/v1
+oid sha256:52c2238e8fcb78fc7640bf2cc8d570d0d994f995535035330cc6c6f0a107de02
 size 4999987936

model-00004-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de7d38892ae8716c2db64de348f458ad8e29e1c051a1cfe2dc7b56f66e7a24b3
 size 5000518496

 version https://git-lfs.github.com/spec/v1
+oid sha256:adcfee886dee742f9a759d78ca9085570e4f10b2dc89f114d68e9f18c61105b5
 size 5000518496

model-00005-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7845edf2d7a924b61c8e2a68dd45db89ffe4e22824a9ec2656853d9fa1fcb15
 size 4999987952

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a3e3635a9ded02f0df27e91d609e85d76e036684079ccebeeab6dd233d5c924
 size 4999987952

model-00006-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8b7adfd181d9b4579aa98756d7d480baa0413bc79b105643b6b09025ac22122
 size 5000518480

 version https://git-lfs.github.com/spec/v1
+oid sha256:36aa4b438cd05d24b785f4bd77beb30793c1a404f78496b4cf74c94af585bad0
 size 5000518480

model-00007-of-00007.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cddda0170555478b607e02b57cb8af0bdf54ff7d1c6b30a79b15302e7cd8b5ed
 size 3040999600

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3b79d2bbf259c2457410d901c7618c0c0a014b16f23469adbefc55c27a2de06
 size 3040999600

recipe.yaml CHANGED Viewed

@@ -1,9 +1,10 @@
-default_stage:
-  default_modifiers:
     AWQModifier:
       config_groups:
         group_0:
-          targets: [Linear]
           weights:
             num_bits: 8
             type: int
@@ -13,12 +14,14 @@ default_stage:
             block_structure: null
             dynamic: false
             actorder: null
-            observer: minmax
             observer_kwargs: {}
           input_activations: null
           output_activations: null
       targets: [Linear]
-      ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
       mappings:
       - smooth_layer: re:.*input_layernorm$
         balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']

+quant_stage:
+  quant_modifiers:
     AWQModifier:
       config_groups:
         group_0:
+          targets: ['re:.*gate_proj.*', 're:.*up_proj.*', 're:.*down_proj.*', 're:.*k_proj.*',
+            're:.*q_proj.*', 're:.*v_proj.*', 're:.*o_proj.*']
           weights:
             num_bits: 8
             type: int
             block_structure: null
             dynamic: false
             actorder: null
+            observer: mse
             observer_kwargs: {}
           input_activations: null
           output_activations: null
+          format: null
       targets: [Linear]
+      ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
+        model.norm, 're:.*shared_experts.*', 're:.*mlp.gate']
       mappings:
       - smooth_layer: re:.*input_layernorm$
         balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']