Recurrent-Llama-3.2-train-recurrence-8 / model.safetensors.index.json
smcleish's picture
Upload RavenForCausalLM
fd0abc3 verified
{
"metadata": {
"total_parameters": 1385228288,
"total_size": 5540913152
},
"weight_map": {
"lm_head.weight": "model-00002-of-00002.safetensors",
"transformer.adapter.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.coda.0.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.coda.1.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.coda.2.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.coda.3.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.0.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.1.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.2.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.3.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.4.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.core_block.5.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.ln_f.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.0.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.1.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.2.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.attn.Wqkv.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.attn.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.mlp.fc.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.mlp.proj.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.norm_1.weight": "model-00001-of-00002.safetensors",
"transformer.prelude.3.norm_2.weight": "model-00001-of-00002.safetensors",
"transformer.wte.weight": "model-00001-of-00002.safetensors"
}
}