{
  "metadata": {
    "artifact_id": "d86d900b-d7f7-4838-9727-35ca1b0d4ec4",
    "name": "mlperf-bert",
    "timestamp": 1745456449,
    "furiosa_llm_version": "249c6f1",
    "furiosa_compiler_version": "b504d5d48"
  },
  "model": {
    "generator_config": {
      "position_id_pad": 1,
      "buckets": [
        {
          "batch_size": 1,
          "attention_size": 384,
          "kv_cache_size": 0
        },
        {
          "batch_size": 1,
          "attention_size": 320,
          "kv_cache_size": 0
        },
        {
          "batch_size": 1,
          "attention_size": 192,
          "kv_cache_size": 0
        },
        {
          "batch_size": 1,
          "attention_size": 128,
          "kv_cache_size": 0
        },
        {
          "batch_size": 1,
          "attention_size": 160,
          "kv_cache_size": 0
        },
        {
          "batch_size": 2,
          "attention_size": 96,
          "kv_cache_size": 0
        }
      ],
      "model_qname": "furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering",
      "paged_attention_config": null,
      "packing_type": "IDENTITY",
      "kv_cache_sharing_across_beams_config": null,
      "num_speculative_tokens": null,
      "unpadded_vocab_size": null
    },
    "hf_config": {
      "return_dict": true,
      "output_hidden_states": false,
      "output_attentions": false,
      "torchscript": false,
      "torch_dtype": "float32",
      "use_bfloat16": false,
      "tf_legacy_loss": false,
      "pruned_heads": {},
      "tie_word_embeddings": true,
      "chunk_size_feed_forward": 0,
      "is_encoder_decoder": false,
      "is_decoder": false,
      "cross_attention_hidden_size": null,
      "add_cross_attention": false,
      "tie_encoder_decoder": false,
      "max_length": 20,
      "min_length": 0,
      "do_sample": false,
      "early_stopping": false,
      "num_beams": 1,
      "num_beam_groups": 1,
      "diversity_penalty": 0.0,
      "temperature": 1.0,
      "top_k": 50,
      "top_p": 1.0,
      "typical_p": 1.0,
      "repetition_penalty": 1.0,
      "length_penalty": 1.0,
      "no_repeat_ngram_size": 0,
      "encoder_no_repeat_ngram_size": 0,
      "bad_words_ids": null,
      "num_return_sequences": 1,
      "output_scores": false,
      "return_dict_in_generate": false,
      "forced_bos_token_id": null,
      "forced_eos_token_id": null,
      "remove_invalid_values": false,
      "exponential_decay_length_penalty": null,
      "suppress_tokens": null,
      "begin_suppress_tokens": null,
      "architectures": [
        "BertForQuestionAnswering"
      ],
      "finetuning_task": null,
      "id2label": {
        "0": "LABEL_0",
        "1": "LABEL_1"
      },
      "label2id": {
        "LABEL_0": 0,
        "LABEL_1": 1
      },
      "tokenizer_class": null,
      "prefix": null,
      "bos_token_id": null,
      "pad_token_id": 0,
      "eos_token_id": null,
      "sep_token_id": null,
      "decoder_start_token_id": null,
      "task_specific_params": null,
      "problem_type": null,
      "_name_or_path": "furiosa-ai/mlperf-bert-large",
      "_attn_implementation_autoset": false,
      "transformers_version": "4.48.1",
      "model_type": "bert",
      "vocab_size": 30522,
      "hidden_size": 1024,
      "num_hidden_layers": 24,
      "num_attention_heads": 16,
      "hidden_act": "rngd_gelu",
      "intermediate_size": 4096,
      "hidden_dropout_prob": 0.1,
      "attention_probs_dropout_prob": 0.1,
      "max_position_embeddings": 512,
      "type_vocab_size": 2,
      "initializer_range": 0.02,
      "layer_norm_eps": 1e-12,
      "position_embedding_type": "absolute",
      "use_cache": true,
      "classifier_dropout": null
    },
    "model_metadata": {
      "pretrained_id": "furiosa-ai/mlperf-bert-large",
      "task_type": "question-answering",
      "llm_config": {
        "optimization_config": {
          "attention_type": "VANILLA",
          "optimize_rope": false,
          "optimize_packed": false,
          "decompose_layernorm": false,
          "optimize_furiosa": false,
          "use_unsplit_packed": true,
          "compact_causal_mask": false,
          "use_rngd_gelu": true,
          "causal_mask_free_decoding": false,
          "kv_cache_sharing_across_beams": false,
          "inbound_beamsearch_softmax": false,
          "calculate_logit_only_for_last_token": false,
          "optimized_for_speculative_decoding": false
        },
        "quantization_config": {
          "weight": "int8",
          "activation": "int8",
          "kv_cache": "int8",
          "use_mcp": true
        }
      },
      "hf_configs": {},
      "model_weight_path": null,
      "trust_remote_code": null,
      "allow_bfloat16_cast_with_mcp": true,
      "auto_bfloat16_cast": null
    },
    "model_rewriting_config": {
      "do_decompositions_for_model_rewrite": false,
      "use_blockwise_compile": true,
      "embedding_layer_as_single_block": false,
      "num_blocks_per_supertask": 24,
      "embed_all_constants_into_graph": true,
      "optimize_logit_shape": true
    },
    "parallel_config": {
      "tensor_parallel_size": 1,
      "pipeline_parallel_size": 1
    },
    "pipelines": [
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn384",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              1,
              384
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              1,
              384
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              1,
              384,
              384
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              1,
              384
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              1,
              384,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "92713480ca8937ba5a8dadead5278d92"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  1,
                  384
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  1,
                  384
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  1,
                  384,
                  384
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  1,
                  384
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  1,
                  384,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    384
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    384
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    384
                  ],
                  [
                    0,
                    384
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    384
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    384
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "92713480ca8937ba5a8dadead5278d92": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      },
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn320",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              1,
              320
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              1,
              320
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              1,
              320,
              320
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              1,
              320
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              1,
              320,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "0ff335c7ce60753ee28a910e9fab16f4"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  1,
                  320
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  1,
                  320
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  1,
                  320,
                  320
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  1,
                  320
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  1,
                  320,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    320
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    320
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    320
                  ],
                  [
                    0,
                    320
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    320
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    320
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "0ff335c7ce60753ee28a910e9fab16f4": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      },
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn192",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              1,
              192
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              1,
              192
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              1,
              192,
              192
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              1,
              192
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              1,
              192,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "eb1a559cd1f53e2ede74f1307030a1d0"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  1,
                  192
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  1,
                  192
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  1,
                  192,
                  192
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  1,
                  192
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  1,
                  192,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    192
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    192
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    192
                  ],
                  [
                    0,
                    192
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    192
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    192
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "eb1a559cd1f53e2ede74f1307030a1d0": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      },
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn128",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              1,
              128
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              1,
              128
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              1,
              128,
              128
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              1,
              128
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              1,
              128,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "9ad47915b97d47d3ce069c00271807d6"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  1,
                  128
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  1,
                  128
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  1,
                  128,
                  128
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  1,
                  128
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  1,
                  128,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    128
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    128
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    128
                  ],
                  [
                    0,
                    128
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    128
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    128
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "9ad47915b97d47d3ce069c00271807d6": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      },
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn160",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              1,
              160
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              1,
              160
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              1,
              160,
              160
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              1,
              160
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              1,
              160,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "8a7b90c915c1cecaf381c70594e3f955"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  1,
                  160
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  1,
                  160
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  1,
                  160,
                  160
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  1,
                  160
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  1,
                  160,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    160
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    160
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    160
                  ],
                  [
                    0,
                    160
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    160
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    1
                  ],
                  [
                    0,
                    160
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "8a7b90c915c1cecaf381c70594e3f955": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      },
      {
        "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b2-attn96",
        "devices": {
          "0": "npu:0:0"
        },
        "tensors": {
          "d0_arg0_1": {
            "shape": [
              2,
              96
            ],
            "dtype": "i32"
          },
          "d0_arg1_1": {
            "shape": [
              2,
              96
            ],
            "dtype": "i32"
          },
          "d0_arg2_1": {
            "shape": [
              2,
              96,
              96
            ],
            "dtype": "bool"
          },
          "d0_arg3_1": {
            "shape": [
              2,
              96
            ],
            "dtype": "i32"
          },
          "submod_d0_c0": {
            "shape": [
              2,
              96,
              2
            ],
            "dtype": "f32"
          }
        },
        "supertasks": {
          "0": {
            "kind": "input",
            "inputs": [],
            "outputs": [
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg2_1",
              "d0_arg3_1"
            ]
          },
          "1": {
            "kind": "output",
            "inputs": [
              "submod_d0_c0"
            ],
            "outputs": []
          },
          "2": {
            "kind": "edf",
            "inputs": [
              "d0_arg2_1",
              "d0_arg0_1",
              "d0_arg1_1",
              "d0_arg3_1"
            ],
            "outputs": [
              "submod_d0_c0"
            ],
            "device": "0",
            "data": null,
            "data_blob": "97bb3cab5f2f7f5f4640c04cbf3b6ee0"
          }
        },
        "metadata": {
          "tensors": {
            "inputs": {
              "input_ids": {
                "shape": [
                  2,
                  96
                ],
                "dtype": "i32",
                "idx": 0
              },
              "token_type_ids": {
                "shape": [
                  2,
                  96
                ],
                "dtype": "i32",
                "idx": 1
              },
              "attention_mask": {
                "shape": [
                  2,
                  96,
                  96
                ],
                "dtype": "bool",
                "idx": 2
              },
              "position_ids": {
                "shape": [
                  2,
                  96
                ],
                "dtype": "i32",
                "idx": 3
              }
            },
            "outputs": {
              "logits": {
                "shape": [
                  2,
                  96,
                  2
                ],
                "dtype": "f32",
                "idx": 0
              }
            }
          },
          "tensor_slices": {
            "inputs": {
              "d0_arg0_1": {
                "placements": [
                  [
                    0,
                    2
                  ],
                  [
                    0,
                    96
                  ]
                ],
                "origin": "input_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg1_1": {
                "placements": [
                  [
                    0,
                    2
                  ],
                  [
                    0,
                    96
                  ]
                ],
                "origin": "token_type_ids",
                "dtype": "i32",
                "device": "0"
              },
              "d0_arg2_1": {
                "placements": [
                  [
                    0,
                    2
                  ],
                  [
                    0,
                    96
                  ],
                  [
                    0,
                    96
                  ]
                ],
                "origin": "attention_mask",
                "dtype": "bool",
                "device": "0"
              },
              "d0_arg3_1": {
                "placements": [
                  [
                    0,
                    2
                  ],
                  [
                    0,
                    96
                  ]
                ],
                "origin": "position_ids",
                "dtype": "i32",
                "device": "0"
              }
            },
            "outputs": {
              "submod_d0_c0": {
                "placements": [
                  [
                    0,
                    2
                  ],
                  [
                    0,
                    96
                  ],
                  [
                    0,
                    2
                  ]
                ],
                "origin": "logits",
                "dtype": "f32",
                "device": "0"
              }
            }
          }
        },
        "blobs": {
          "97bb3cab5f2f7f5f4640c04cbf3b6ee0": null
        },
        "param_files": {
          "0": {
            "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
            "format": "safetensors"
          }
        },
        "device_constraints": [],
        "version": "0.1.0"
      }
    ],
    "pipeline_metadata_list": [
      {
        "output_logits_size": null
      },
      {
        "output_logits_size": null
      },
      {
        "output_logits_size": null
      },
      {
        "output_logits_size": null
      },
      {
        "output_logits_size": null
      },
      {
        "output_logits_size": null
      }
    ],
    "max_prompt_len": null
  },
  "speculative_model": null,
  "version": {
    "major": 2,
    "minor": 0
  },
  "prefill_chunk_size": null
}