| { | |
| "metadata": { | |
| "ParamSize": 195, | |
| "ParamBytes": 15284318208.0, | |
| "BitsPerParam": 32.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 197001216, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 32064, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 197001216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a1a95f2ae2cbb87ec39da0237285fcf" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8498183451d684d93243ea4bd8b7387" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca06f33d2c7dd69f8d7c5edcf48ea89f" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2e62af200caafb75a275e4e8f13481d" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52e4e7932dc9c58ddd69ebcc3785758f" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6146432fb7f4de5e039fa715d4f6f88" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bfd622c23a859a8caa3373eae5a92a0c" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2dee4b39f0d8c44af06cdb70c411972a" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c86231026959e1ee9c39f77580b4d36c" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4df073b3abf4f9b3ba309e5f051b686" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "863376eb8fb83fbd1d4b9cdd220570db" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a07c89e333245a1b9fc83a1dbac4784e" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7f721c2dee4313cd389a8744a54a57a" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ac9114c2f79ee8ecb6acabff14d47b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f5b3ebaecbac5418fbfef5191cec4a66" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eec3c25ff27a28f26619e020dcf36a33" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c344567a132f77166b2b92f005023589" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3592ab87bcac6bd322ffb1f98d865321" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2baa0024a66afc53a17d8854250ce52" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "332df77ea4c98780ef03087b02d4f0b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "66e117a0b3c91314bb916d8092c08c4b" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "254213171e87192b116a6001e21e5380" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0323d05e0b570882ee5fb5bf65d0ed3a" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e26308d35674f54e27992f0a5faa73f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0af17cf2bf5a145594719210b6a8f83" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dd34b0bcb25c732b01509c25b03d88b9" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b39a5862892a6574797ada5026df12e" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f2a529246d94bcdd656c011a6a4b108" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d31b46cdf0f7a82d3ff4421e61102c00" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab4427e01b6a1244e591f434214e4f98" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba1b08e065c50c717cfccb720977a89a" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2731c882238d1dbf87271d878fb75bc9" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf2f818616c8c0b49c0696ceeeb3d31e" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "47e1bdd3ab9fc119fea1194a9c11812f" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "da47e4e2561ecd1c4c960d1e23497f50" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dbdcd87e416aab9437d66c8707258ac3" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "db96b2e29b155a05b8cd798cd9d2fc91" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4753eb90e52db9cc0fbd142b51bbbd5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ff5d6e5a45ca240daac649c19f39162b" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "433382b1132869273927e886440ccb58" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eebe1e0f17dc034bba16b08d3fd43c88" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b56059bb4d31809170802db34af7d972" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ff0fde7540794379745a93b55edacd44" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 197001216, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.weight", | |
| "shape": [ | |
| 32064, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 197001216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "be7196f03511c36c99e523b850ca8a32" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0d30aebb3a59ff8fcca24f97c925c6c9" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "174b795d4e73a7881c9d95006813b8c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe1d46c6d3ad99a09763819d815254f8" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c291e6ad97a818a4fa74611055f636a4" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7d0580ce4b3df12a343c8a695392fadf" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cfc680cf95db583a2fe7b8c6c3e66a14" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d06d98739ef2de1bdc053137d37dcbc6" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c151dc82187b062c394d951c3e9aeffe" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59f85c1d97b55ac900acafc3ab749f73" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d376a9bcc985c64d8ffe6d34dc38ad0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "15f21606350539e964a92916db310283" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c7d657c6a4e1b0b87adc6300ed1671a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a0b17bf7e746555df137af85a748f8bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "587d44e0fb712d76ca41b5f2b4f87f55" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a571afd9be656d5325d9e7ffb9185cf1" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "477127ea1dd1f1a7d0fb37a73982ba22" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7ee9835d085e68f538d9b9e18e1f9039" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "348e35692621eb726093d13d00641284" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "462c15d820b774613bd50aa5dc86e6ca" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fd85d0d9bcc99c751ac7401c68ba84c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0670bb93ccbc7531c7888558b08f2a7e" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6352c9659868003407bda6ce28ac116" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2c7ba522f9c048464cf9d37182ca6de" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9091aeb918ca10f12955d834106e8f74" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9fec27c050650d72e8ea376614e83750" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f42743a8968949651d6750af351d3d00" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "622bd17397cf197996833a8e089de782" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8fdb302d56f62f1b364341e827ac2502" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e3bb70410e9e76cac4fd556a1d5c1843" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "334a2709aff969419610de50edad2a2e" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "414254a4707d029c059bdab4a53a7e74" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "726731b6f371b1d33fa7ae48c2e0edf2" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b612962255aa2565f7b52d4674a0769e" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2e3a459fb85d5f3feb0d04fa69a7b2d" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9f0ef7bd8d4f8cc25c904d0551f37381" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5b921cf45c0b543dfbbcfd9990493ef7" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ea8f58953612625d91efde95e7cfee17" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9464ec7fce415770174d6104a0ae0c74" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "faac273328ad10a5f93f143a575f9fbb" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "372e9032127db76b13dd03cdd234e4c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ad53516c6fcfedba524cb649c77c4da" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c669f5144b3c7df758e26d48b293532b" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "755466d7f0d76d0f600bbd127d2c7770" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f3a29d2a65d2c380c681e131ff2790b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b6b13e37be29c6803048778228d6867b" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8fa55ebbe959c1f6d2b112436939e880" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8cc00a6ace7652beda8160f915406086" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "10287f3839f3dc24001ef242425c4177" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "54ab51d906df2fd76417ff70c3ab6e98" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7980eed55dee7401dcabba9daf8423c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "36f1a0e75e6a69ca0b70a50b54fee6d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "31983ac834677d56859f18dd04e6b7ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2b32195b9f424b17102c2b5fd022704" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c52b0587c2eb9ae32937ed1831786b36" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9b4bf902c2bb945391279a4f49b5d9e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "681c3b06b63667fcc4e272ea2bb9dea4" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd774ad3b25502c07b36db6d8402f07f" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b531f788792f8f88f9e66b95e13f486f" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b9517f7339b853dbf5ef13a7bf68995a" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b56076774afa3a74894ef178e7fc7141" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "745443853620366613c90c545d18944e" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd8a0b8f6d06b755634a60c2efe4eef5" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f5e8a71d3586a87448fb6bc2349de004" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8cf15bc0de37b910f20a70148873ab50" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59a946c11a9e176672209c79576956d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad0625712234da65f789e2f45b865948" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64a189148c89b4b27e2d01993420f5bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6fad09eaf0494ca36a622ef7e947b59" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6246da06f2eef63deeca7fb93146eded" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad59662a8299d5ecb431d43360e11355" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d28692b7018350c48053b1f1a6ee1a1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c72588e0084b005ce45ed7a0e962dfd6" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a11ae6276fd826348428d0af230943e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5c3e20920e2a9948e71c5409827599ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0f85e57b33ff92e8e08d7c7e84c580b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cc667273837743550bc491811b9dfe9a" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "47b5876d1873645481fe759c0b20dfed" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e8bb9cc23214da86653d2f00db16cb7" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3065282668ba766fd02bade48dfa5b49" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "168b7e63fe8a581b5cdf71c4045c9929" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e7f94e87ad0017518ceda44d56be21e" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "336a428795678723f63becba86fdc266" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "160002bcdcec523a19519ab095bc7826" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce7494328332ff2883c686ed0f0bb5d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c0a99b3a3148680d8000ce68f79bf50c" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19273728, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 6144 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 12288 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18432 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 18874368, | |
| "byteOffset": 24576 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18905088 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18911232 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18917376 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18923520 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18929664 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18935808 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18941952 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18948096 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18954240 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18960384 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18966528 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18972672 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18978816 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18984960 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18991104 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 18997248 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19003392 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19009536 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19015680 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19027968 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19040256 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19046400 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19052544 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19058688 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19064832 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19070976 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19077120 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19083264 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19089408 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19095552 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19101696 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19107840 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19113984 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19120128 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19126272 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19132416 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19138560 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19144704 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19150848 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19156992 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19163136 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19169280 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19175424 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19181568 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19187712 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19193856 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19200000 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19206144 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19212288 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19218432 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19224576 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19230720 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19236864 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19243008 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19249152 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19255296 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19261440 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 6144, | |
| "byteOffset": 19267584 | |
| } | |
| ], | |
| "md5sum": "bb56fd33909f1793f97b53dfc2bed1d4" | |
| } | |
| ] | |
| } |