ogbert-v1-mlm / tokenizer_config.json
mjbommar's picture
Fix tokenizer to not produce token_type_ids (ModernBERT compatibility)
0a39df9 verified
{
"additional_special_tokens": null,
"backend": "tokenizers",
"model_input_names": ["input_ids", "attention_mask"],
"bos_token": "<|start|>",
"cls_token": "<|cls|>",
"eos_token": "<|end|>",
"is_local": false,
"mask_token": "<|mask|>",
"model_max_length": 1024,
"pad_token": "<|pad|>",
"padding_side": "right",
"sep_token": "<|sep|>",
"special_tokens_map": {
"bos_token": "<|start|>",
"cls_token": "<|cls|>",
"eos_token": "<|end|>",
"mask_token": "<|mask|>",
"pad_token": "<|pad|>",
"sep_token": "<|sep|>",
"unk_token": "<|unk|>"
},
"tokenizer_class": "PreTrainedTokenizerFast",
"truncation": "longest_first",
"unk_token": "<|unk|>"
}