from transformers import PretrainedConfig class GptAndPrejudiceConfig(PretrainedConfig): model_type = "gpt_and_prejudice" def __init__( self, vocab_size=50257, emb_dim=896, n_layers=8, n_heads=14, context_length=256, drop_rate=0.2, qkv_bias=True, attn_dropout=0.0, resid_dropout=0.0, mlp_hidden_mult=4, **kwargs, ): super().__init__(**kwargs) self.vocab_size = vocab_size self.emb_dim = emb_dim self.n_layers = n_layers self.n_heads = n_heads self.context_length = context_length self.drop_rate = drop_rate self.qkv_bias = qkv_bias self.attn_dropout = attn_dropout self.resid_dropout = resid_dropout self.mlp_hidden_mult = mlp_hidden_mult self.is_decoder = True self.use_cache = False