iproskurina commited on
Commit
96513a7
·
verified ·
1 Parent(s): cc467f3

Update README.md to include GPTQModel usage.

Browse files
Files changed (1) hide show
  1. README.md +28 -1
README.md CHANGED
@@ -113,4 +113,31 @@ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
113
  model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0", model_basename="model")
114
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
115
  print(pipeline("auto-gptq is")[0]["generated_text"])
116
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0", model_basename="model")
114
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
115
  print(pipeline("auto-gptq is")[0]["generated_text"])
116
+ ```
117
+ ### Run the model with GPTQModel
118
+
119
+ GPTQModel package: https://github.com/ModelCloud/GPTQModel
120
+
121
+ ```
122
+ pip install -v gptqmodel=="1.8.0" --no-build-isolation
123
+ from gptqmodel import GPTQModel
124
+
125
+ model_id = 'iproskurina/bloom-3b-GPTQ-4bit-g128'
126
+ model = GPTQModel.load(model_id)
127
+ result = model.generate("Uncovering deep insights")[0] # tokens
128
+ print(model.tokenizer.decode(result)) # string output
129
+ ```
130
+
131
+ ### Run the model with GPTQModel
132
+
133
+ GPTQModel package: https://github.com/ModelCloud/GPTQModel
134
+
135
+ ```
136
+ pip install -v gptqmodel=="1.8.0" --no-build-isolation
137
+ from gptqmodel import GPTQModel
138
+
139
+ model_id = 'iproskurina/bloom-3b-GPTQ-4bit-g128'
140
+ model = GPTQModel.load(model_id)
141
+ result = model.generate("Uncovering deep insights")[0] # tokens
142
+ print(model.tokenizer.decode(result)) # string output
143
+ ```