JunHowie commited on
Commit
0c89011
·
verified ·
1 Parent(s): 05a155f

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -1
  2. config.json +1 -1
  3. model.safetensors +1 -1
  4. quantize_config.json +1 -1
README.md CHANGED
@@ -17,6 +17,8 @@ base_model_relation: quantized
17
  Base model: [Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)
18
 
19
  <i>This model is quantized to 4-bit with a group size of 128.</i>
 
 
20
 
21
  ```
22
  vllm serve JunHowie/Qwen3-4B-Thinking-2507-GPTQ-Int4
@@ -261,4 +263,4 @@ If you find our work helpful, feel free to give us a cite.
261
  primaryClass={cs.CL},
262
  url={https://arxiv.org/abs/2505.09388},
263
  }
264
- ```
 
17
  Base model: [Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)
18
 
19
  <i>This model is quantized to 4-bit with a group size of 128.</i>
20
+ <br>
21
+ <i>Compared to earlier quantized versions, the new quantized model demonstrates better tokens/s efficiency. This improvement comes from setting desc_act=False in the quantization configuration.</i>
22
 
23
  ```
24
  vllm serve JunHowie/Qwen3-4B-Thinking-2507-GPTQ-Int4
 
263
  primaryClass={cs.CL},
264
  url={https://arxiv.org/abs/2505.09388},
265
  }
266
+ ```
config.json CHANGED
@@ -58,7 +58,7 @@
58
  "quantization_config": {
59
  "bits": 4,
60
  "checkpoint_format": "gptq",
61
- "desc_act": true,
62
  "group_size": 128,
63
  "hyb_act": false,
64
  "lm_head": false,
 
58
  "quantization_config": {
59
  "bits": 4,
60
  "checkpoint_format": "gptq",
61
+ "desc_act": false,
62
  "group_size": 128,
63
  "hyb_act": false,
64
  "lm_head": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96a9996ce410451bbbdb0038efc9cc882c14c7192ec9065020b3d194b9c42e91
3
  size 2669888648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fbf018a7cb2568b6ac95cbf54cf629e1ca8923a4722aad8211065f11226de5e
3
  size 2669888648
quantize_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "bits": 4,
3
  "group_size": 128,
4
- "desc_act": true,
5
  "hyb_act": false,
6
  "sym": true,
7
  "lm_head": false,
 
1
  {
2
  "bits": 4,
3
  "group_size": 128,
4
+ "desc_act": false,
5
  "hyb_act": false,
6
  "sym": true,
7
  "lm_head": false,