adding flax to pt conversion script
Browse files- converter.py +31 -0
converter.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from transformers import AutoTokenizer, RobertaModel
|
| 2 |
+
|
| 3 |
+
# model = RobertaModel.from_pretrained('sinhala-roberta-mc4', from_flax=True)
|
| 4 |
+
# tokenizer = AutoTokenizer.from_pretrained('sinhala-roberta-mc4')
|
| 5 |
+
|
| 6 |
+
# tokenizer.save_pretrained('sinhala-roberta-mc4')
|
| 7 |
+
# model.save_pretrained('sinhala-roberta-mc4')
|
| 8 |
+
|
| 9 |
+
from transformers import RobertaForMaskedLM, FlaxRobertaForMaskedLM, AutoTokenizer
|
| 10 |
+
import torch
|
| 11 |
+
import numpy as np
|
| 12 |
+
import jax
|
| 13 |
+
import jax.numpy as jnp
|
| 14 |
+
jax.config.update('jax_platform_name', 'cpu')
|
| 15 |
+
MODEL_PATH = "sinhala-roberta-oscar"
|
| 16 |
+
model = FlaxRobertaForMaskedLM.from_pretrained(MODEL_PATH)
|
| 17 |
+
def to_f32(t):
|
| 18 |
+
return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
|
| 19 |
+
model.params = to_f32(model.params)
|
| 20 |
+
model.save_pretrained(MODEL_PATH)
|
| 21 |
+
pt_model = RobertaForMaskedLM.from_pretrained(MODEL_PATH, from_flax=True).to('cpu')
|
| 22 |
+
input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
|
| 23 |
+
input_ids_pt = torch.tensor(input_ids)
|
| 24 |
+
logits_pt = pt_model(input_ids_pt).logits
|
| 25 |
+
print(logits_pt)
|
| 26 |
+
logits_fx = model(input_ids).logits
|
| 27 |
+
print(logits_fx)
|
| 28 |
+
pt_model.save_pretrained(MODEL_PATH)
|
| 29 |
+
# also save tokenizer
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
| 31 |
+
tokenizer.save_pretrained(MODEL_PATH)
|