Update README.md to include GPTQModel usage.
Browse files
README.md
CHANGED
|
@@ -113,4 +113,31 @@ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
|
|
| 113 |
model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0", model_basename="model")
|
| 114 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
| 115 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
| 116 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0", model_basename="model")
|
| 114 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
| 115 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
| 116 |
+
```
|
| 117 |
+
### Run the model with GPTQModel
|
| 118 |
+
|
| 119 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
| 120 |
+
|
| 121 |
+
```
|
| 122 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
| 123 |
+
from gptqmodel import GPTQModel
|
| 124 |
+
|
| 125 |
+
model_id = 'iproskurina/bloom-3b-GPTQ-4bit-g128'
|
| 126 |
+
model = GPTQModel.load(model_id)
|
| 127 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
| 128 |
+
print(model.tokenizer.decode(result)) # string output
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### Run the model with GPTQModel
|
| 132 |
+
|
| 133 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
| 134 |
+
|
| 135 |
+
```
|
| 136 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
| 137 |
+
from gptqmodel import GPTQModel
|
| 138 |
+
|
| 139 |
+
model_id = 'iproskurina/bloom-3b-GPTQ-4bit-g128'
|
| 140 |
+
model = GPTQModel.load(model_id)
|
| 141 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
| 142 |
+
print(model.tokenizer.decode(result)) # string output
|
| 143 |
+
```
|