Upload folder using huggingface_hub
Browse files- README.md +3 -1
- config.json +1 -1
- model.safetensors +1 -1
- quantize_config.json +1 -1
README.md
CHANGED
|
@@ -17,6 +17,8 @@ base_model_relation: quantized
|
|
| 17 |
Base model: [Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)
|
| 18 |
|
| 19 |
<i>This model is quantized to 4-bit with a group size of 128.</i>
|
|
|
|
|
|
|
| 20 |
|
| 21 |
```
|
| 22 |
vllm serve JunHowie/Qwen3-4B-Thinking-2507-GPTQ-Int4
|
|
@@ -261,4 +263,4 @@ If you find our work helpful, feel free to give us a cite.
|
|
| 261 |
primaryClass={cs.CL},
|
| 262 |
url={https://arxiv.org/abs/2505.09388},
|
| 263 |
}
|
| 264 |
-
```
|
|
|
|
| 17 |
Base model: [Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)
|
| 18 |
|
| 19 |
<i>This model is quantized to 4-bit with a group size of 128.</i>
|
| 20 |
+
<br>
|
| 21 |
+
<i>Compared to earlier quantized versions, the new quantized model demonstrates better tokens/s efficiency. This improvement comes from setting desc_act=False in the quantization configuration.</i>
|
| 22 |
|
| 23 |
```
|
| 24 |
vllm serve JunHowie/Qwen3-4B-Thinking-2507-GPTQ-Int4
|
|
|
|
| 263 |
primaryClass={cs.CL},
|
| 264 |
url={https://arxiv.org/abs/2505.09388},
|
| 265 |
}
|
| 266 |
+
```
|
config.json
CHANGED
|
@@ -58,7 +58,7 @@
|
|
| 58 |
"quantization_config": {
|
| 59 |
"bits": 4,
|
| 60 |
"checkpoint_format": "gptq",
|
| 61 |
-
"desc_act":
|
| 62 |
"group_size": 128,
|
| 63 |
"hyb_act": false,
|
| 64 |
"lm_head": false,
|
|
|
|
| 58 |
"quantization_config": {
|
| 59 |
"bits": 4,
|
| 60 |
"checkpoint_format": "gptq",
|
| 61 |
+
"desc_act": false,
|
| 62 |
"group_size": 128,
|
| 63 |
"hyb_act": false,
|
| 64 |
"lm_head": false,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2669888648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fbf018a7cb2568b6ac95cbf54cf629e1ca8923a4722aad8211065f11226de5e
|
| 3 |
size 2669888648
|
quantize_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"bits": 4,
|
| 3 |
"group_size": 128,
|
| 4 |
-
"desc_act":
|
| 5 |
"hyb_act": false,
|
| 6 |
"sym": true,
|
| 7 |
"lm_head": false,
|
|
|
|
| 1 |
{
|
| 2 |
"bits": 4,
|
| 3 |
"group_size": 128,
|
| 4 |
+
"desc_act": false,
|
| 5 |
"hyb_act": false,
|
| 6 |
"sym": true,
|
| 7 |
"lm_head": false,
|