| { | |
| "_name_or_path": "seed_encoder_3_decoder_layers", | |
| "activation_dropout": 0.0, | |
| "activation_fn": "gelu", | |
| "adaptive_input": false, | |
| "adaptive_softmax_cutoff": null, | |
| "adaptive_softmax_dropout": 0, | |
| "architectures": [ | |
| "SEEDEncoderDot_NLL_LN" | |
| ], | |
| "attention_dropout": 0.1, | |
| "cross_self_attention": false, | |
| "decoder_atten_window": 8, | |
| "decoder_attention_heads": 12, | |
| "decoder_embed_dim": 768, | |
| "decoder_embed_path": null, | |
| "decoder_ffn_embed_dim": 3072, | |
| "decoder_input_dim": 768, | |
| "decoder_layerdrop": 0, | |
| "decoder_layers": 3, | |
| "decoder_layers_to_keep": null, | |
| "decoder_learned_pos": true, | |
| "decoder_normalize_before": true, | |
| "decoder_output_dim": 768, | |
| "dropout": 0.1, | |
| "encoder_attention_heads": 12, | |
| "encoder_embed_dim": 768, | |
| "encoder_ffn_embed_dim": 3072, | |
| "encoder_layerdrop": 0.0, | |
| "encoder_layers": 12, | |
| "encoder_layers_to_keep": null, | |
| "finetuning_task": "msmarco", | |
| "layernorm_embedding": true, | |
| "max_positions": 512, | |
| "max_source_positions": 512, | |
| "max_target_positions": 512, | |
| "model_type": "seed_encoder", | |
| "no_cross_attention": false, | |
| "no_scale_embedding": true, | |
| "no_token_positional_embeddings": false, | |
| "pad_token_id": 1, | |
| "pooler_activation_fn": "tanh", | |
| "pooler_dropout": 0.0, | |
| "quant_noise_pq": 0.0, | |
| "quant_noise_pq_block_size": 8, | |
| "share_all_embeddings": true, | |
| "share_decoder_input_output_embed": true, | |
| "tie_adaptive_weights": true, | |
| "train_ratio": "0.5:0.5", | |
| "vocab_size": 32769 | |
| } | |