zett-hypernetwork-multilingual-Mistral-7B-v0.1 / configuration_hypernet.py

Upload ZettHypernet

80b2f16 verified over 1 year ago

2.51 kB

	from transformers import PretrainedConfig

	class ZettHypernetConfig(PretrainedConfig):
	def __init__(
	self,
	hn_model_name_or_path: str = "roberta-base",
	hn_surface_maxlen: int = 16,
	hn_n_layers: int = 3,
	n_embd: int = 768,
	hn_hidden_size: int = None,
	hn_intermediate_size: int = None,
	hn_rescale_embeddings: bool = False,
	use_unigram_bias: bool = False,
	hn_embed_target_priors: bool = False,
	hn_add_inter_token_attention: bool = False,
	hn_inter_token_attention_bias_by_priors: bool = False,
	hn_inter_token_attention_bias_scaler: float = 1.0,
	hn_n_inter_token_blocks: int = 16,
	hn_language_adapter_bottleneck_dim: int = 0,
	hn_embed_using_source_embeddings: bool = False,
	hn_concat_last_hidden_state: bool = False,
	hn_single_head: bool = False,
	hn_predict_bias: bool = True,
	hn_num_attention_heads: int = None,
	hn_embed_lang_id: bool = False,
	hn_model_type: str = "roberta",
	n_langs: int = None, # set in train.py
	**kwargs
	):
	super().__init__(**kwargs)

	self.model_type = "zett_hypernetwork"
	self.hn_model_name_or_path = hn_model_name_or_path
	self.hn_surface_maxlen = hn_surface_maxlen
	self.hn_n_layers = hn_n_layers
	self.n_embd = n_embd
	self.hn_hidden_size = hn_hidden_size
	self.hn_intermediate_size = hn_intermediate_size
	self.hn_rescale_embeddings = hn_rescale_embeddings
	self.use_unigram_bias = use_unigram_bias
	self.hn_embed_target_priors = hn_embed_target_priors
	self.hn_add_inter_token_attention = hn_add_inter_token_attention
	self.hn_inter_token_attention_bias_by_priors = (
	hn_inter_token_attention_bias_by_priors
	)
	self.hn_inter_token_attention_bias_scaler = hn_inter_token_attention_bias_scaler
	self.hn_n_inter_token_blocks = hn_n_inter_token_blocks
	self.hn_language_adapter_bottleneck_dim = hn_language_adapter_bottleneck_dim
	self.hn_embed_using_source_embeddings = hn_embed_using_source_embeddings
	self.hn_concat_last_hidden_state = hn_concat_last_hidden_state
	self.hn_single_head = hn_single_head
	self.hn_predict_bias = hn_predict_bias
	self.hn_num_attention_heads = hn_num_attention_heads
	self.hn_embed_lang_id = hn_embed_lang_id
	self.hn_model_type = hn_model_type
	self.n_langs = n_langs