Hariprasath28 commited on
Commit
0ba16bf
·
verified ·
1 Parent(s): 04a07c4

Upload 8-bit GPTQ quantized Orpheus model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: svara-tts-v1-gptqmodel
4
+ tags:
5
+ - quantized
6
+ - gptq
7
+ - text-to-speech
8
+ - tts
9
+ - orpheus
10
+ - 8bit
11
+ library_name: transformers
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ # Orpheus 3B 8-bit GPTQ
16
+
17
+ This is an 8-bit GPTQ quantized version of [baseten/orpheus-3b-0.1-ft](https://huggingface.co/baseten/orpheus-3b-0.1-ft).
18
+
19
+ ## Model Details
20
+
21
+ - **Base Model**: svara-tts-v1-gptqmodel
22
+ - **Quantization**: 8-bit GPTQ
23
+ - **Group Size**: 128
24
+ - **Calibration Dataset**: canopylabs/zac-sample-dataset (TTS-specific)
25
+ - **Library**: auto-gptq
26
+
27
+ ## Usage
28
+
29
+ ```python
30
+ from auto_gptq import AutoGPTQForCausalLM
31
+ from transformers import AutoTokenizer
32
+
33
+ # Load the quantized model
34
+ model = AutoGPTQForCausalLM.from_quantized(
35
+ "Hariprasath28/svara-tts-v1-gptq",
36
+ device="cuda:0", # or "cpu"
37
+ use_triton=False,
38
+ trust_remote_code=True
39
+ )
40
+
41
+ tokenizer = AutoTokenizer.from_pretrained("Hariprasath28/svara-tts-v1-gptq", trust_remote_code=True)
42
+
43
+ # Generate TTS tokens
44
+ text = "tara: Hello, this is a test of the quantized Orpheus model."
45
+ inputs = tokenizer(text, return_tensors="pt").to("cuda:0")
46
+
47
+ with torch.no_grad():
48
+ outputs = model.generate(
49
+ **inputs,
50
+ max_new_tokens=100,
51
+ temperature=0.7,
52
+ do_sample=True
53
+ )
54
+
55
+ generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+ print(generated)
chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 128009,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 3072,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 24,
19
+ "num_hidden_layers": 28,
20
+ "num_key_value_heads": 8,
21
+ "pad_token_id": 128263,
22
+ "pretraining_tp": 1,
23
+ "quantization_config": {
24
+ "bits": 4,
25
+ "checkpoint_format": "gptq",
26
+ "desc_act": false,
27
+ "group_size": 128,
28
+ "lm_head": false,
29
+ "meta": {
30
+ "act_group_aware": false,
31
+ "damp_auto_increment": 0.01,
32
+ "damp_percent": 0.01,
33
+ "mse": 0.0,
34
+ "quantizer": [
35
+ "gptqmodel:4.2.5"
36
+ ],
37
+ "static_groups": false,
38
+ "true_sequential": true,
39
+ "uri": "https://github.com/modelcloud/gptqmodel",
40
+ "v2": false,
41
+ "v2_alpha": 0.25
42
+ },
43
+ "pack_dtype": "int32",
44
+ "quant_method": "gptq",
45
+ "sym": true
46
+ },
47
+ "rms_norm_eps": 1e-05,
48
+ "rope_scaling": {
49
+ "factor": 32.0,
50
+ "high_freq_factor": 4.0,
51
+ "low_freq_factor": 1.0,
52
+ "original_max_position_embeddings": 8192,
53
+ "rope_type": "llama3"
54
+ },
55
+ "rope_theta": 500000.0,
56
+ "tie_word_embeddings": true,
57
+ "transformers_version": "4.57.1",
58
+ "unsloth_version": "2025.10.4",
59
+ "use_cache": true,
60
+ "vocab_size": 156940
61
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": 128009,
5
+ "pad_token_id": 128263,
6
+ "transformers_version": "4.57.1"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0772c922098a8fdb8941899724fb3c8e96238884eee66c666fbba1443365f05
3
+ size 2432001824
quant_log.csv ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0007165673,0.01000,1.079
3
+ 0,self_attn.v_proj,0.0000596307,0.01000,1.078
4
+ 0,self_attn.q_proj,0.0013268112,0.01000,1.075
5
+ 0,self_attn.o_proj,0.0000097104,0.01000,1.126
6
+ 0,mlp.gate_proj,0.0014320708,0.01000,1.095
7
+ 0,mlp.up_proj,0.0013744390,0.01000,1.092
8
+ 0,mlp.down_proj,0.0032274312,0.01000,3.047
9
+ 1,self_attn.k_proj,0.0036077098,0.01000,1.082
10
+ 1,self_attn.v_proj,0.0008333813,0.01000,1.075
11
+ 1,self_attn.q_proj,0.0073467204,0.01000,1.086
12
+ 1,self_attn.o_proj,0.0000822506,0.01000,1.119
13
+ 1,mlp.gate_proj,0.0033585912,0.01000,1.097
14
+ 1,mlp.up_proj,0.0031722694,0.01000,1.080
15
+ 1,mlp.down_proj,3.9980311538,0.01000,3.069
16
+ 2,self_attn.k_proj,0.0043991325,0.01000,1.084
17
+ 2,self_attn.v_proj,0.0012270709,0.01000,1.087
18
+ 2,self_attn.q_proj,0.0091787712,0.01000,1.140
19
+ 2,self_attn.o_proj,0.0001391864,0.01000,1.093
20
+ 2,mlp.gate_proj,0.0041719070,0.01000,1.108
21
+ 2,mlp.up_proj,0.0037999180,0.01000,1.104
22
+ 2,mlp.down_proj,0.0002487536,0.01000,3.029
23
+ 3,self_attn.k_proj,0.0020995598,0.01000,1.099
24
+ 3,self_attn.v_proj,0.0007532771,0.01000,1.132
25
+ 3,self_attn.q_proj,0.0045231522,0.01000,1.081
26
+ 3,self_attn.o_proj,0.0001802197,0.01000,1.108
27
+ 3,mlp.gate_proj,0.0055153234,0.01000,1.107
28
+ 3,mlp.up_proj,0.0048551054,0.01000,1.117
29
+ 3,mlp.down_proj,0.0003586192,0.01000,3.002
30
+ 4,self_attn.k_proj,0.0022233009,0.01000,1.147
31
+ 4,self_attn.v_proj,0.0008471522,0.01000,1.105
32
+ 4,self_attn.q_proj,0.0048226416,0.01000,1.096
33
+ 4,self_attn.o_proj,0.0002814944,0.01000,1.096
34
+ 4,mlp.gate_proj,0.0058206076,0.01000,1.112
35
+ 4,mlp.up_proj,0.0048891712,0.01000,1.157
36
+ 4,mlp.down_proj,0.0004396601,0.01000,3.006
37
+ 5,self_attn.k_proj,0.0035945541,0.01000,1.136
38
+ 5,self_attn.v_proj,0.0010127633,0.01000,1.087
39
+ 5,self_attn.q_proj,0.0069930129,0.01000,1.107
40
+ 5,self_attn.o_proj,0.0002054535,0.01000,1.112
41
+ 5,mlp.gate_proj,0.0072308896,0.01000,1.146
42
+ 5,mlp.up_proj,0.0062122815,0.01000,1.105
43
+ 5,mlp.down_proj,0.0004650744,0.01000,2.999
44
+ 6,self_attn.k_proj,0.0027636019,0.01000,1.087
45
+ 6,self_attn.v_proj,0.0008667380,0.01000,1.093
46
+ 6,self_attn.q_proj,0.0054596979,0.01000,1.100
47
+ 6,self_attn.o_proj,0.0004086048,0.01000,1.147
48
+ 6,mlp.gate_proj,0.0080465792,0.01000,1.119
49
+ 6,mlp.up_proj,0.0069164634,0.01000,1.120
50
+ 6,mlp.down_proj,0.0003812077,0.01000,3.064
51
+ 7,self_attn.k_proj,0.0027822383,0.01000,1.095
52
+ 7,self_attn.v_proj,0.0008612974,0.01000,1.094
53
+ 7,self_attn.q_proj,0.0053715444,0.01000,1.136
54
+ 7,self_attn.o_proj,0.0002576188,0.01000,1.110
55
+ 7,mlp.gate_proj,0.0085338670,0.01000,1.108
56
+ 7,mlp.up_proj,0.0077248947,0.01000,1.114
57
+ 7,mlp.down_proj,0.0005805181,0.01000,3.090
58
+ 8,self_attn.k_proj,0.0029828666,0.01000,1.105
59
+ 8,self_attn.v_proj,0.0009772369,0.01000,1.090
60
+ 8,self_attn.q_proj,0.0059334094,0.01000,1.137
61
+ 8,self_attn.o_proj,0.0003355184,0.01000,1.095
62
+ 8,mlp.gate_proj,0.0087882949,0.01000,1.107
63
+ 8,mlp.up_proj,0.0079226494,0.01000,1.120
64
+ 8,mlp.down_proj,0.0006587114,0.01000,3.024
65
+ 9,self_attn.k_proj,0.0024128832,0.01000,1.096
66
+ 9,self_attn.v_proj,0.0009061856,0.01000,1.119
67
+ 9,self_attn.q_proj,0.0055490041,0.01000,1.082
68
+ 9,self_attn.o_proj,0.0003017767,0.01000,1.097
69
+ 9,mlp.gate_proj,0.0082731644,0.01000,1.104
70
+ 9,mlp.up_proj,0.0075000601,0.01000,1.125
71
+ 9,mlp.down_proj,0.0007202323,0.01000,3.002
72
+ 10,self_attn.k_proj,0.0027228307,0.01000,1.134
73
+ 10,self_attn.v_proj,0.0009034530,0.01000,1.091
74
+ 10,self_attn.q_proj,0.0053220509,0.01000,1.113
75
+ 10,self_attn.o_proj,0.0001987794,0.01000,1.097
76
+ 10,mlp.gate_proj,0.0086941755,0.01000,1.109
77
+ 10,mlp.up_proj,0.0082439293,0.01000,1.150
78
+ 10,mlp.down_proj,0.0019673965,0.01000,2.997
79
+ 11,self_attn.k_proj,0.0023969931,0.01000,1.086
80
+ 11,self_attn.v_proj,0.0010096786,0.01000,1.078
81
+ 11,self_attn.q_proj,0.0049915458,0.01000,1.085
82
+ 11,self_attn.o_proj,0.0005560061,0.01000,1.099
83
+ 11,mlp.gate_proj,0.0107198213,0.01000,1.133
84
+ 11,mlp.up_proj,0.0102804375,0.01000,1.102
85
+ 11,mlp.down_proj,0.0011271423,0.01000,3.014
86
+ 12,self_attn.k_proj,0.0029296636,0.01000,1.076
87
+ 12,self_attn.v_proj,0.0012349977,0.01000,1.095
88
+ 12,self_attn.q_proj,0.0063043941,0.01000,1.087
89
+ 12,self_attn.o_proj,0.0007200323,0.01000,1.151
90
+ 12,mlp.gate_proj,0.0116617102,0.01000,1.107
91
+ 12,mlp.up_proj,0.0111292980,0.01000,1.111
92
+ 12,mlp.down_proj,0.0014334196,0.01000,3.021
93
+ 13,self_attn.k_proj,0.0035808150,0.01000,1.085
94
+ 13,self_attn.v_proj,0.0015270074,0.01000,1.086
95
+ 13,self_attn.q_proj,0.0074143148,0.01000,1.129
96
+ 13,self_attn.o_proj,0.0009024844,0.01000,1.096
97
+ 13,mlp.gate_proj,0.0145458823,0.01000,1.102
98
+ 13,mlp.up_proj,0.0133621042,0.01000,1.111
99
+ 13,mlp.down_proj,0.0023988670,0.01000,3.040
100
+ 14,self_attn.k_proj,0.0031979346,0.01000,1.101
101
+ 14,self_attn.v_proj,0.0019720971,0.01000,1.089
102
+ 14,self_attn.q_proj,0.0079760299,0.01000,1.134
103
+ 14,self_attn.o_proj,0.0015582386,0.01000,1.103
104
+ 14,mlp.gate_proj,0.0180456819,0.01000,1.109
105
+ 14,mlp.up_proj,0.0168532025,0.01000,1.135
106
+ 14,mlp.down_proj,0.0041922475,0.01000,3.065
107
+ 15,self_attn.k_proj,0.0036405652,0.01000,1.107
108
+ 15,self_attn.v_proj,0.0018848111,0.01000,1.147
109
+ 15,self_attn.q_proj,0.0086390972,0.01000,1.087
110
+ 15,self_attn.o_proj,0.0015424666,0.01000,1.088
111
+ 15,mlp.gate_proj,0.0214356906,0.01000,1.101
112
+ 15,mlp.up_proj,0.0195761467,0.01000,1.138
113
+ 15,mlp.down_proj,0.0055152711,0.01000,3.005
114
+ 16,self_attn.k_proj,0.0045111026,0.01000,1.142
115
+ 16,self_attn.v_proj,0.0023171834,0.01000,1.084
116
+ 16,self_attn.q_proj,0.0101644966,0.01000,1.099
117
+ 16,self_attn.o_proj,0.0011662681,0.01000,1.091
118
+ 16,mlp.gate_proj,0.0213560588,0.01000,1.135
119
+ 16,mlp.up_proj,0.0195173964,0.01000,1.118
120
+ 16,mlp.down_proj,0.0054034785,0.01000,3.009
121
+ 17,self_attn.k_proj,0.0044137718,0.01000,1.101
122
+ 17,self_attn.v_proj,0.0023232712,0.01000,1.096
123
+ 17,self_attn.q_proj,0.0101262539,0.01000,1.098
124
+ 17,self_attn.o_proj,0.0008717865,0.01000,1.113
125
+ 17,mlp.gate_proj,0.0211382581,0.01000,1.144
126
+ 17,mlp.up_proj,0.0194139481,0.01000,1.109
127
+ 17,mlp.down_proj,0.0061121312,0.01000,3.061
128
+ 18,self_attn.k_proj,0.0048886443,0.01000,1.094
129
+ 18,self_attn.v_proj,0.0027045659,0.01000,1.089
130
+ 18,self_attn.q_proj,0.0109312589,0.01000,1.094
131
+ 18,self_attn.o_proj,0.0008918150,0.01000,1.112
132
+ 18,mlp.gate_proj,0.0234650211,0.01000,1.108
133
+ 18,mlp.up_proj,0.0218645099,0.01000,1.110
134
+ 18,mlp.down_proj,0.0086717100,0.01000,3.046
135
+ 19,self_attn.k_proj,0.0044884081,0.01000,1.100
136
+ 19,self_attn.v_proj,0.0030410163,0.01000,1.093
137
+ 19,self_attn.q_proj,0.0110051451,0.01000,1.139
138
+ 19,self_attn.o_proj,0.0020651088,0.01000,1.100
139
+ 19,mlp.gate_proj,0.0245785045,0.01000,1.109
140
+ 19,mlp.up_proj,0.0234034874,0.01000,1.119
141
+ 19,mlp.down_proj,0.0092809318,0.01000,3.034
142
+ 20,self_attn.k_proj,0.0050302518,0.01000,1.081
143
+ 20,self_attn.v_proj,0.0034087377,0.01000,1.153
144
+ 20,self_attn.q_proj,0.0111182124,0.01000,1.085
145
+ 20,self_attn.o_proj,0.0028834537,0.01000,1.091
146
+ 20,mlp.gate_proj,0.0262545438,0.01000,1.120
147
+ 20,mlp.up_proj,0.0257513053,0.01000,1.103
148
+ 20,mlp.down_proj,0.0104322885,0.01000,3.031
149
+ 21,self_attn.k_proj,0.0051919348,0.01000,1.126
150
+ 21,self_attn.v_proj,0.0042079091,0.01000,1.121
151
+ 21,self_attn.q_proj,0.0125673522,0.01000,1.102
152
+ 21,self_attn.o_proj,0.0020777454,0.01000,1.096
153
+ 21,mlp.gate_proj,0.0278684226,0.01000,1.119
154
+ 21,mlp.up_proj,0.0270644968,0.01000,1.142
155
+ 21,mlp.down_proj,0.0108675939,0.01000,3.004
156
+ 22,self_attn.k_proj,0.0049076396,0.01000,1.137
157
+ 22,self_attn.v_proj,0.0043899354,0.01000,1.098
158
+ 22,self_attn.q_proj,0.0115766209,0.01000,1.093
159
+ 22,self_attn.o_proj,0.0016416336,0.01000,1.090
160
+ 22,mlp.gate_proj,0.0266865456,0.01000,1.148
161
+ 22,mlp.up_proj,0.0263432138,0.01000,1.125
162
+ 22,mlp.down_proj,0.0106157281,0.01000,3.008
163
+ 23,self_attn.k_proj,0.0050228717,0.01000,1.102
164
+ 23,self_attn.v_proj,0.0052046320,0.01000,1.092
165
+ 23,self_attn.q_proj,0.0136195882,0.01000,1.095
166
+ 23,self_attn.o_proj,0.0022992036,0.01000,1.139
167
+ 23,mlp.gate_proj,0.0328759497,0.01000,1.114
168
+ 23,mlp.up_proj,0.0312380646,0.01000,1.104
169
+ 23,mlp.down_proj,0.0120926987,0.01000,3.006
170
+ 24,self_attn.k_proj,0.0053366215,0.01000,1.104
171
+ 24,self_attn.v_proj,0.0058682263,0.01000,1.092
172
+ 24,self_attn.q_proj,0.0130573710,0.01000,1.089
173
+ 24,self_attn.o_proj,0.0041216169,0.01000,1.144
174
+ 24,mlp.gate_proj,0.0386738813,0.01000,1.134
175
+ 24,mlp.up_proj,0.0368005651,0.01000,1.131
176
+ 24,mlp.down_proj,0.0130093956,0.01000,3.077
177
+ 25,self_attn.k_proj,0.0049963995,0.01000,1.103
178
+ 25,self_attn.v_proj,0.0055508415,0.01000,1.104
179
+ 25,self_attn.q_proj,0.0166119861,0.01000,1.177
180
+ 25,self_attn.o_proj,0.0073872493,0.01000,1.132
181
+ 25,mlp.gate_proj,0.0591742414,0.01000,1.121
182
+ 25,mlp.up_proj,0.0547565439,0.01000,1.119
183
+ 25,mlp.down_proj,0.0167801850,0.01000,3.042
184
+ 26,self_attn.k_proj,0.0047088243,0.01000,1.104
185
+ 26,self_attn.v_proj,0.0059495622,0.01000,1.148
186
+ 26,self_attn.q_proj,0.0157712409,0.01000,1.094
187
+ 26,self_attn.o_proj,0.0089129083,0.01000,1.101
188
+ 26,mlp.gate_proj,0.0934302301,0.01000,1.115
189
+ 26,mlp.up_proj,0.0831171238,0.01000,1.161
190
+ 26,mlp.down_proj,0.0334263968,0.01000,3.024
191
+ 27,self_attn.k_proj,0.0035992840,0.01000,1.138
192
+ 27,self_attn.v_proj,0.0043383457,0.01000,1.100
193
+ 27,self_attn.q_proj,0.0133404605,0.01000,1.104
194
+ 27,self_attn.o_proj,0.0147618290,0.01000,1.102
195
+ 27,mlp.gate_proj,0.1286905029,0.01000,1.138
196
+ 27,mlp.up_proj,0.1280683315,0.01000,1.127
197
+ 27,mlp.down_proj,0.7720242125,0.01000,3.055
quantize_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:4.2.5"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.01,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0,
20
+ "v2": false,
21
+ "v2_alpha": 0.25,
22
+ "act_group_aware": false
23
+ }
24
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<custom_token_7>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ }
26
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044e2a10201774018db120391980464472baabf223bd353cea49b17da0b66abc
3
+ size 22849546
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff