add formatting example
#26
by
sam-mosaic
- opened
README.md
CHANGED
|
@@ -98,6 +98,31 @@ from transformers import AutoTokenizer
|
|
| 98 |
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
| 99 |
```
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
## Model Description
|
| 102 |
|
| 103 |
The architecture is a modification of a standard decoder-only transformer.
|
|
|
|
| 98 |
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
| 99 |
```
|
| 100 |
|
| 101 |
+
### Formatting
|
| 102 |
+
|
| 103 |
+
This model was trained on data formatted in the dolly-15k format:
|
| 104 |
+
|
| 105 |
+
```python
|
| 106 |
+
INSTRUCTION_KEY = "### Instruction:"
|
| 107 |
+
RESPONSE_KEY = "### Response:"
|
| 108 |
+
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
|
| 109 |
+
PROMPT_FOR_GENERATION_FORMAT = """{intro}
|
| 110 |
+
{instruction_key}
|
| 111 |
+
{instruction}
|
| 112 |
+
{response_key}
|
| 113 |
+
""".format(
|
| 114 |
+
intro=INTRO_BLURB,
|
| 115 |
+
instruction_key=INSTRUCTION_KEY,
|
| 116 |
+
instruction="{instruction}",
|
| 117 |
+
response_key=RESPONSE_KEY,
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
example = "James decides to run 3 sprints 3 times a week. He runs 60 meters each sprint. How many total meters does he run a week? Explain before answering."
|
| 121 |
+
fmt_ex = PROMPT_FOR_GENERATION_FORMAT.format(instruction=example)
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
In the above example, `fmt_ex` is ready to be tokenized and sent through the model.
|
| 125 |
+
|
| 126 |
## Model Description
|
| 127 |
|
| 128 |
The architecture is a modification of a standard decoder-only transformer.
|