{ "metadata": { "artifact_id": "d86d900b-d7f7-4838-9727-35ca1b0d4ec4", "name": "mlperf-bert", "timestamp": 1745456449, "furiosa_llm_version": "249c6f1", "furiosa_compiler_version": "b504d5d48" }, "model": { "generator_config": { "position_id_pad": 1, "buckets": [ { "batch_size": 1, "attention_size": 384, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 320, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 192, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 128, "kv_cache_size": 0 }, { "batch_size": 1, "attention_size": 160, "kv_cache_size": 0 }, { "batch_size": 2, "attention_size": 96, "kv_cache_size": 0 } ], "model_qname": "furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering", "paged_attention_config": null, "packing_type": "IDENTITY", "kv_cache_sharing_across_beams_config": null, "num_speculative_tokens": null, "unpadded_vocab_size": null }, "hf_config": { "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "float32", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": [ "BertForQuestionAnswering" ], "finetuning_task": null, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "tokenizer_class": null, "prefix": null, "bos_token_id": null, "pad_token_id": 0, "eos_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "furiosa-ai/mlperf-bert-large", "_attn_implementation_autoset": false, "transformers_version": "4.48.1", "model_type": "bert", "vocab_size": 30522, "hidden_size": 1024, "num_hidden_layers": 24, "num_attention_heads": 16, "hidden_act": "rngd_gelu", "intermediate_size": 4096, "hidden_dropout_prob": 0.1, "attention_probs_dropout_prob": 0.1, "max_position_embeddings": 512, "type_vocab_size": 2, "initializer_range": 0.02, "layer_norm_eps": 1e-12, "position_embedding_type": "absolute", "use_cache": true, "classifier_dropout": null }, "model_metadata": { "pretrained_id": "furiosa-ai/mlperf-bert-large", "task_type": "question-answering", "llm_config": { "optimization_config": { "attention_type": "VANILLA", "optimize_rope": false, "optimize_packed": false, "decompose_layernorm": false, "optimize_furiosa": false, "use_unsplit_packed": true, "compact_causal_mask": false, "use_rngd_gelu": true, "causal_mask_free_decoding": false, "kv_cache_sharing_across_beams": false, "inbound_beamsearch_softmax": false, "calculate_logit_only_for_last_token": false, "optimized_for_speculative_decoding": false }, "quantization_config": { "weight": "int8", "activation": "int8", "kv_cache": "int8", "use_mcp": true } }, "hf_configs": {}, "model_weight_path": null, "trust_remote_code": null, "allow_bfloat16_cast_with_mcp": true, "auto_bfloat16_cast": null }, "model_rewriting_config": { "do_decompositions_for_model_rewrite": false, "use_blockwise_compile": true, "embedding_layer_as_single_block": false, "num_blocks_per_supertask": 24, "embed_all_constants_into_graph": true, "optimize_logit_shape": true }, "parallel_config": { "tensor_parallel_size": 1, "pipeline_parallel_size": 1 }, "pipelines": [ { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn384", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 1, 384 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 384 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 384, 384 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 384 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 384, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "92713480ca8937ba5a8dadead5278d92" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 1, 384 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 1, 384 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 1, 384, 384 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 1, 384 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 1, 384, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 384 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 384 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 384 ], [ 0, 384 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 384 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 1 ], [ 0, 384 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "92713480ca8937ba5a8dadead5278d92": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn320", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 1, 320 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 320 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 320, 320 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 320 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 320, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "0ff335c7ce60753ee28a910e9fab16f4" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 1, 320 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 1, 320 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 1, 320, 320 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 1, 320 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 1, 320, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 320 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 320 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 320 ], [ 0, 320 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 320 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 1 ], [ 0, 320 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "0ff335c7ce60753ee28a910e9fab16f4": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn192", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 1, 192 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 192 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 192, 192 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 192 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 192, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "eb1a559cd1f53e2ede74f1307030a1d0" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 1, 192 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 1, 192 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 1, 192, 192 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 1, 192 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 1, 192, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 192 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 192 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 192 ], [ 0, 192 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 192 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 1 ], [ 0, 192 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "eb1a559cd1f53e2ede74f1307030a1d0": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn128", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 1, 128 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 128 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 128, 128 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 128 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 128, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "9ad47915b97d47d3ce069c00271807d6" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 1, 128 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 1, 128 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 1, 128, 128 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 1, 128 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 1, 128, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 128 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 128 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 128 ], [ 0, 128 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 128 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 1 ], [ 0, 128 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "9ad47915b97d47d3ce069c00271807d6": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn160", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 1, 160 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 1, 160 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 1, 160, 160 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 1, 160 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 1, 160, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "8a7b90c915c1cecaf381c70594e3f955" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 1, 160 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 1, 160 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 1, 160, 160 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 1, 160 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 1, 160, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 1 ], [ 0, 160 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 1 ], [ 0, 160 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 1 ], [ 0, 160 ], [ 0, 160 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 1 ], [ 0, 160 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 1 ], [ 0, 160 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "8a7b90c915c1cecaf381c70594e3f955": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" }, { "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b2-attn96", "devices": { "0": "npu:0:0" }, "tensors": { "d0_arg0_1": { "shape": [ 2, 96 ], "dtype": "i32" }, "d0_arg1_1": { "shape": [ 2, 96 ], "dtype": "i32" }, "d0_arg2_1": { "shape": [ 2, 96, 96 ], "dtype": "bool" }, "d0_arg3_1": { "shape": [ 2, 96 ], "dtype": "i32" }, "submod_d0_c0": { "shape": [ 2, 96, 2 ], "dtype": "f32" } }, "supertasks": { "0": { "kind": "input", "inputs": [], "outputs": [ "d0_arg0_1", "d0_arg1_1", "d0_arg2_1", "d0_arg3_1" ] }, "1": { "kind": "output", "inputs": [ "submod_d0_c0" ], "outputs": [] }, "2": { "kind": "edf", "inputs": [ "d0_arg2_1", "d0_arg0_1", "d0_arg1_1", "d0_arg3_1" ], "outputs": [ "submod_d0_c0" ], "device": "0", "data": null, "data_blob": "97bb3cab5f2f7f5f4640c04cbf3b6ee0" } }, "metadata": { "tensors": { "inputs": { "input_ids": { "shape": [ 2, 96 ], "dtype": "i32", "idx": 0 }, "token_type_ids": { "shape": [ 2, 96 ], "dtype": "i32", "idx": 1 }, "attention_mask": { "shape": [ 2, 96, 96 ], "dtype": "bool", "idx": 2 }, "position_ids": { "shape": [ 2, 96 ], "dtype": "i32", "idx": 3 } }, "outputs": { "logits": { "shape": [ 2, 96, 2 ], "dtype": "f32", "idx": 0 } } }, "tensor_slices": { "inputs": { "d0_arg0_1": { "placements": [ [ 0, 2 ], [ 0, 96 ] ], "origin": "input_ids", "dtype": "i32", "device": "0" }, "d0_arg1_1": { "placements": [ [ 0, 2 ], [ 0, 96 ] ], "origin": "token_type_ids", "dtype": "i32", "device": "0" }, "d0_arg2_1": { "placements": [ [ 0, 2 ], [ 0, 96 ], [ 0, 96 ] ], "origin": "attention_mask", "dtype": "bool", "device": "0" }, "d0_arg3_1": { "placements": [ [ 0, 2 ], [ 0, 96 ] ], "origin": "position_ids", "dtype": "i32", "device": "0" } }, "outputs": { "submod_d0_c0": { "placements": [ [ 0, 2 ], [ 0, 96 ], [ 0, 2 ] ], "origin": "logits", "dtype": "f32", "device": "0" } } } }, "blobs": { "97bb3cab5f2f7f5f4640c04cbf3b6ee0": null }, "param_files": { "0": { "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", "format": "safetensors" } }, "device_constraints": [], "version": "0.1.0" } ], "pipeline_metadata_list": [ { "output_logits_size": null }, { "output_logits_size": null }, { "output_logits_size": null }, { "output_logits_size": null }, { "output_logits_size": null }, { "output_logits_size": null } ], "max_prompt_len": null }, "speculative_model": null, "version": { "major": 2, "minor": 0 }, "prefill_chunk_size": null }