rombodawg commited on
Commit
34b9263
·
verified ·
0 Parent(s):

Duplicate from Replete-AI/Adapter_For_Replete-Coder-Qwen2-1.5b

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: Qwen/Qwen2-1.5B
4
+ tags:
5
+ - text-generation-inference
6
+ - transformers
7
+ - unsloth
8
+ - qwen2
9
+ datasets:
10
+ - Replete-AI/code_bagel_hermes-2.5
11
+ - Replete-AI/code_bagel
12
+ - Replete-AI/OpenHermes-2.5-Uncensored
13
+ - teknium/OpenHermes-2.5
14
+ - layoric/tiny-codes-alpaca
15
+ - glaiveai/glaive-code-assistant-v3
16
+ - ajibawa-2023/Code-290k-ShareGPT
17
+ - TIGER-Lab/MathInstruct
18
+ - chargoddard/commitpack-ft-instruct-rated
19
+ - iamturun/code_instructions_120k_alpaca
20
+ - ise-uiuc/Magicoder-Evol-Instruct-110K
21
+ - cognitivecomputations/dolphin-coder
22
+ - nickrosh/Evol-Instruct-Code-80k-v1
23
+ - coseal/CodeUltraFeedback_binarized
24
+ - glaiveai/glaive-function-calling-v2
25
+ - CyberNative/Code_Vulnerability_Security_DPO
26
+ -
27
+ jondurbin/airoboros-2.2
28
+ - camel-ai
29
+ - lmsys/lmsys-chat-1m
30
+ - CollectiveCognition/chats-data-2023-09-22
31
+ - CoT-Alpaca-GPT4
32
+ - WizardLM/WizardLM_evol_instruct_70k
33
+ - WizardLM/WizardLM_evol_instruct_V2_196k
34
+ - teknium/GPT4-LLM-Cleaned
35
+ - GPTeacher
36
+ - OpenGPT
37
+ - meta-math/MetaMathQA
38
+ - Open-Orca/SlimOrca
39
+ - garage-bAInd/Open-Platypus
40
+ - anon8231489123/ShareGPT_Vicuna_unfiltered
41
+ - Unnatural-Instructions-GPT4
42
+ model-index:
43
+ - name: Replete-Coder-llama3-8b
44
+ results:
45
+ - task:
46
+ name: HumanEval
47
+ type: text-generation
48
+ dataset:
49
+ type: openai_humaneval
50
+ name: HumanEval
51
+ metrics:
52
+ - name: pass@1
53
+ type: pass@1
54
+ value:
55
+ verified: false
56
+ - task:
57
+ name: AI2 Reasoning Challenge
58
+ type: text-generation
59
+ dataset:
60
+ name: AI2 Reasoning Challenge (25-Shot)
61
+ type: ai2_arc
62
+ config: ARC-Challenge
63
+ split: test
64
+ args:
65
+ num_few_shot: 25
66
+ metrics:
67
+ - type: accuracy
68
+ value:
69
+ name: normalized accuracy
70
+ source:
71
+ url: https://www.placeholderurl.com
72
+ name: Open LLM Leaderboard
73
+ - task:
74
+ name: Text Generation
75
+ type: text-generation
76
+ dataset:
77
+ name: HellaSwag (10-Shot)
78
+ type: hellaswag
79
+ split: validation
80
+ args:
81
+ num_few_shot: 10
82
+ metrics:
83
+ - type: accuracy
84
+ value:
85
+ name: normalized accuracy
86
+ source:
87
+ url: https://www.placeholderurl.com
88
+ name: Open LLM Leaderboard
89
+ - task:
90
+ name: Text Generation
91
+ type: text-generation
92
+ dataset:
93
+ name: MMLU (5-Shot)
94
+ type: cais/mmlu
95
+ config: all
96
+ split: test
97
+ args:
98
+ num_few_shot: 5
99
+ metrics:
100
+ - type: accuracy
101
+ value:
102
+ name: accuracy
103
+ source:
104
+ url: https://www.placeholderurl.com
105
+ name: Open LLM Leaderboard
106
+ - task:
107
+ name: Text Generation
108
+ type: text-generation
109
+ dataset:
110
+ name: TruthfulQA (0-shot)
111
+ type: truthful_qa
112
+ config: multiple_choice
113
+ split: validation
114
+ args:
115
+ num_few_shot: 0
116
+ metrics:
117
+ - type: multiple_choice_accuracy
118
+ value:
119
+ source:
120
+ url: https://www.placeholderurl.com
121
+ name: Open LLM Leaderboard
122
+ - task:
123
+ name: Text Generation
124
+ type: text-generation
125
+ dataset:
126
+ name: Winogrande (5-shot)
127
+ type: winogrande
128
+ config: winogrande_xl
129
+ split: validation
130
+ args:
131
+ num_few_shot: 5
132
+ metrics:
133
+ - type: accuracy
134
+ value:
135
+ name: accuracy
136
+ source:
137
+ url: https://www.placeholderurl.com
138
+ name: Open LLM Leaderboard
139
+ - task:
140
+ name: Text Generation
141
+ type: text-generation
142
+ dataset:
143
+ name: GSM8k (5-shot)
144
+ type: gsm8k
145
+ config: main
146
+ split: test
147
+ args:
148
+ num_few_shot: 5
149
+ metrics:
150
+ - type: accuracy
151
+ value:
152
+ name: accuracy
153
+ source:
154
+ url: https://www.placeholderurl.com
155
+ name: Open LLM Leaderboard
156
+
157
+ ---
158
+ This is the adapter (Aka Lora) For the Replete-AI/Replete-Coder-Qwen-1.5b AI model. Feel free to use it to attach to your own models and see how it performs.
159
+
160
+ We'd love to hear about your models that you create with this adapter in the community posts!
161
+
162
+ Link for the original model bellow:
163
+
164
+ - https://huggingface.co/Replete-AI/Replete-Coder-Qwen-1.5b
165
+
166
+ ___________________________________________________________________
167
+ # Original Model card
168
+
169
+
170
+ # Replete-Coder-Qwen2-1.5b
171
+ Finetuned by: Rombodawg
172
+ ### More than just a coding model!
173
+ Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer!
174
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png)
175
+
176
+ Thank you to TensorDock for sponsoring Replete-Coder-llama3-8b and Replete-Coder-Qwen2-1.5b
177
+ you can check out their website for cloud compute rental bellow.
178
+ - https://tensordock.com
179
+ __________________________________________________________________________________________________
180
+ Replete-Coder-Qwen2-1.5b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened.
181
+
182
+ The Replete-Coder models (including Replete-Coder-llama3-8b and Replete-Coder-Qwen2-1.5b) feature the following:
183
+
184
+ - Advanced coding capabilities in over 100 coding languages
185
+ - Advanced code translation (between languages)
186
+ - Security and vulnerability prevention related coding capabilities
187
+ - General purpose use
188
+ - Uncensored use
189
+ - Function calling
190
+ - Advanced math use
191
+ - Use on low end (8b) and mobile (1.5b) platforms
192
+
193
+ Notice: Replete-Coder series of models are fine-tuned on a context window of 8192 tokens. Performance past this context window is not guaranteed.
194
+ __________________________________________________________________________________________________
195
+
196
+ You can find the 25% non-coding instruction below:
197
+
198
+ - https://huggingface.co/datasets/Replete-AI/OpenHermes-2.5-Uncensored
199
+
200
+ And the 75% coding specific instruction data below:
201
+
202
+ - https://huggingface.co/datasets/Replete-AI/code_bagel
203
+
204
+ These two datasets were combined to create the final dataset for training, which is linked below:
205
+
206
+ - https://huggingface.co/datasets/Replete-AI/code_bagel_hermes-2.5
207
+ __________________________________________________________________________________________________
208
+ ## Prompt Template: ChatML
209
+ ```
210
+ <|im_start|>system
211
+ {}<|im_end|>
212
+
213
+ <|im_start|>user
214
+ {}<|im_end|>
215
+
216
+ <|im_start|>assistant
217
+ {}
218
+ ```
219
+ Note: The system prompt varies in training data, but the most commonly used one is:
220
+ ```
221
+ Below is an instruction that describes a task, Write a response that appropriately completes the request.
222
+ ```
223
+ End token:
224
+ ```
225
+ <|endoftext|>
226
+ ```
227
+ __________________________________________________________________________________________________
228
+ Thank you to the community for your contributions to the Replete-AI/code_bagel_hermes-2.5 dataset. Without the participation of so many members making their datasets free and open source for any to use, this amazing AI model wouldn't be possible.
229
+
230
+ Extra special thanks to Teknium for the Open-Hermes-2.5 dataset and jondurbin for the bagel dataset and the naming idea for the code_bagel series of datasets. You can find both of their huggingface accounts linked below:
231
+
232
+ - https://huggingface.co/teknium
233
+ - https://huggingface.co/jondurbin
234
+
235
+ Another special thanks to unsloth for being the main method of training for Replete-Coder. Bellow you can find their github, as well as the special Replete-Ai secret sause (Unsloth + Qlora + Galore) colab code document that was used to train this model.
236
+
237
+ - https://github.com/unslothai/unsloth
238
+ - https://colab.research.google.com/drive/1eXGqy5M--0yW4u0uRnmNgBka-tDk2Li0?usp=sharing
239
+ __________________________________________________________________________________________________
240
+
241
+ ## Join the Replete-Ai discord! We are a great and Loving community!
242
+
243
+ - https://discord.gg/ZZbnsmVnjD
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "rombodawg/Qwen2-1.5b-Reuploaded",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": "unsloth",
22
+ "target_modules": [
23
+ "o_proj",
24
+ "gate_proj",
25
+ "q_proj",
26
+ "v_proj",
27
+ "down_proj",
28
+ "up_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8b62a20448a4b00307024771092df490655d4fbc52794daff655483bd8dc8b
3
+ size 590925768
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|PAD_TOKEN|>": 151646,
3
+ "<|endoftext|>": 151643,
4
+ "<|im_end|>": 151645,
5
+ "<|im_start|>": 151644
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd80a46195b0e13c3672b7d364fb08094b7e9aaf398d1546aa53bff79dda459f
3
+ size 297493122
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d4fbb4eb2a12b17b73babfa38b90bdafc73a682a0af60498a1e002b1fd5b3a
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3739d7eb999bf36de6559e6cbb22bad92b38628a4dec199689914be8f8eefa40
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": "<|PAD_TOKEN|>"
14
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|PAD_TOKEN|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ }
36
+ },
37
+ "additional_special_tokens": [
38
+ "<|im_start|>",
39
+ "<|im_end|>"
40
+ ],
41
+ "bos_token": null,
42
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
43
+ "clean_up_tokenization_spaces": false,
44
+ "eos_token": "<|endoftext|>",
45
+ "errors": "replace",
46
+ "model_max_length": 131072,
47
+ "pad_token": "<|PAD_TOKEN|>",
48
+ "padding_side": "right",
49
+ "split_special_tokens": false,
50
+ "tokenizer_class": "Qwen2Tokenizer",
51
+ "unk_token": null
52
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c38be9fd5aeecf75ee18e2def9e642c0a2181459b624fd37b3e5a2ab7af10605
3
+ size 5368
vocab.json ADDED
The diff for this file is too large to render. See raw diff