sky-2002's picture
Upload config.json
9af308d verified
raw
history blame contribute delete
337 Bytes
{
"num_attention_heads": 8,
"input_dim": 512,
"embed_dim": 512,
"q_latent_dim": 128,
"kv_latent_dim": 128,
"max_token_len": 512,
"num_shared_experts": 2,
"num_routed_experts": 4,
"moe_top_k": 2,
"expert_intermediate_dim": 1536,
"num_dense_ffn": 1,
"num_moe_ffn": 2,
"vocab_size": 50257,
"max_batch_size": 24
}