| { | |
| "metadata": { | |
| "artifact_id": "d86d900b-d7f7-4838-9727-35ca1b0d4ec4", | |
| "name": "mlperf-bert", | |
| "timestamp": 1745456449, | |
| "furiosa_llm_version": "249c6f1", | |
| "furiosa_compiler_version": "b504d5d48" | |
| }, | |
| "model": { | |
| "generator_config": { | |
| "position_id_pad": 1, | |
| "buckets": [ | |
| { | |
| "batch_size": 1, | |
| "attention_size": 384, | |
| "kv_cache_size": 0 | |
| }, | |
| { | |
| "batch_size": 1, | |
| "attention_size": 320, | |
| "kv_cache_size": 0 | |
| }, | |
| { | |
| "batch_size": 1, | |
| "attention_size": 192, | |
| "kv_cache_size": 0 | |
| }, | |
| { | |
| "batch_size": 1, | |
| "attention_size": 128, | |
| "kv_cache_size": 0 | |
| }, | |
| { | |
| "batch_size": 1, | |
| "attention_size": 160, | |
| "kv_cache_size": 0 | |
| }, | |
| { | |
| "batch_size": 2, | |
| "attention_size": 96, | |
| "kv_cache_size": 0 | |
| } | |
| ], | |
| "model_qname": "furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering", | |
| "paged_attention_config": null, | |
| "packing_type": "IDENTITY", | |
| "kv_cache_sharing_across_beams_config": null, | |
| "num_speculative_tokens": null, | |
| "unpadded_vocab_size": null | |
| }, | |
| "hf_config": { | |
| "return_dict": true, | |
| "output_hidden_states": false, | |
| "output_attentions": false, | |
| "torchscript": false, | |
| "torch_dtype": "float32", | |
| "use_bfloat16": false, | |
| "tf_legacy_loss": false, | |
| "pruned_heads": {}, | |
| "tie_word_embeddings": true, | |
| "chunk_size_feed_forward": 0, | |
| "is_encoder_decoder": false, | |
| "is_decoder": false, | |
| "cross_attention_hidden_size": null, | |
| "add_cross_attention": false, | |
| "tie_encoder_decoder": false, | |
| "max_length": 20, | |
| "min_length": 0, | |
| "do_sample": false, | |
| "early_stopping": false, | |
| "num_beams": 1, | |
| "num_beam_groups": 1, | |
| "diversity_penalty": 0.0, | |
| "temperature": 1.0, | |
| "top_k": 50, | |
| "top_p": 1.0, | |
| "typical_p": 1.0, | |
| "repetition_penalty": 1.0, | |
| "length_penalty": 1.0, | |
| "no_repeat_ngram_size": 0, | |
| "encoder_no_repeat_ngram_size": 0, | |
| "bad_words_ids": null, | |
| "num_return_sequences": 1, | |
| "output_scores": false, | |
| "return_dict_in_generate": false, | |
| "forced_bos_token_id": null, | |
| "forced_eos_token_id": null, | |
| "remove_invalid_values": false, | |
| "exponential_decay_length_penalty": null, | |
| "suppress_tokens": null, | |
| "begin_suppress_tokens": null, | |
| "architectures": [ | |
| "BertForQuestionAnswering" | |
| ], | |
| "finetuning_task": null, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1" | |
| }, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1 | |
| }, | |
| "tokenizer_class": null, | |
| "prefix": null, | |
| "bos_token_id": null, | |
| "pad_token_id": 0, | |
| "eos_token_id": null, | |
| "sep_token_id": null, | |
| "decoder_start_token_id": null, | |
| "task_specific_params": null, | |
| "problem_type": null, | |
| "_name_or_path": "furiosa-ai/mlperf-bert-large", | |
| "_attn_implementation_autoset": false, | |
| "transformers_version": "4.48.1", | |
| "model_type": "bert", | |
| "vocab_size": 30522, | |
| "hidden_size": 1024, | |
| "num_hidden_layers": 24, | |
| "num_attention_heads": 16, | |
| "hidden_act": "rngd_gelu", | |
| "intermediate_size": 4096, | |
| "hidden_dropout_prob": 0.1, | |
| "attention_probs_dropout_prob": 0.1, | |
| "max_position_embeddings": 512, | |
| "type_vocab_size": 2, | |
| "initializer_range": 0.02, | |
| "layer_norm_eps": 1e-12, | |
| "position_embedding_type": "absolute", | |
| "use_cache": true, | |
| "classifier_dropout": null | |
| }, | |
| "model_metadata": { | |
| "pretrained_id": "furiosa-ai/mlperf-bert-large", | |
| "task_type": "question-answering", | |
| "llm_config": { | |
| "optimization_config": { | |
| "attention_type": "VANILLA", | |
| "optimize_rope": false, | |
| "optimize_packed": false, | |
| "decompose_layernorm": false, | |
| "optimize_furiosa": false, | |
| "use_unsplit_packed": true, | |
| "compact_causal_mask": false, | |
| "use_rngd_gelu": true, | |
| "causal_mask_free_decoding": false, | |
| "kv_cache_sharing_across_beams": false, | |
| "inbound_beamsearch_softmax": false, | |
| "calculate_logit_only_for_last_token": false, | |
| "optimized_for_speculative_decoding": false | |
| }, | |
| "quantization_config": { | |
| "weight": "int8", | |
| "activation": "int8", | |
| "kv_cache": "int8", | |
| "use_mcp": true | |
| } | |
| }, | |
| "hf_configs": {}, | |
| "model_weight_path": null, | |
| "trust_remote_code": null, | |
| "allow_bfloat16_cast_with_mcp": true, | |
| "auto_bfloat16_cast": null | |
| }, | |
| "model_rewriting_config": { | |
| "do_decompositions_for_model_rewrite": false, | |
| "use_blockwise_compile": true, | |
| "embedding_layer_as_single_block": false, | |
| "num_blocks_per_supertask": 24, | |
| "embed_all_constants_into_graph": true, | |
| "optimize_logit_shape": true | |
| }, | |
| "parallel_config": { | |
| "tensor_parallel_size": 1, | |
| "pipeline_parallel_size": 1 | |
| }, | |
| "pipelines": [ | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn384", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 1, | |
| 384, | |
| 384 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 1, | |
| 384, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "92713480ca8937ba5a8dadead5278d92" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 1, | |
| 384, | |
| 384 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 1, | |
| 384 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 1, | |
| 384, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 384 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "92713480ca8937ba5a8dadead5278d92": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| }, | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn320", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 1, | |
| 320, | |
| 320 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 1, | |
| 320, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "0ff335c7ce60753ee28a910e9fab16f4" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 1, | |
| 320, | |
| 320 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 1, | |
| 320 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 1, | |
| 320, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 320 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "0ff335c7ce60753ee28a910e9fab16f4": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| }, | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn192", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 1, | |
| 192, | |
| 192 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 1, | |
| 192, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "eb1a559cd1f53e2ede74f1307030a1d0" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 1, | |
| 192, | |
| 192 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 1, | |
| 192 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 1, | |
| 192, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 192 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "eb1a559cd1f53e2ede74f1307030a1d0": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| }, | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn128", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 1, | |
| 128, | |
| 128 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 1, | |
| 128, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "9ad47915b97d47d3ce069c00271807d6" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 1, | |
| 128, | |
| 128 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 1, | |
| 128, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 128 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "9ad47915b97d47d3ce069c00271807d6": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| }, | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn160", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 1, | |
| 160, | |
| 160 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 1, | |
| 160, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "8a7b90c915c1cecaf381c70594e3f955" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 1, | |
| 160, | |
| 160 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 1, | |
| 160 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 1, | |
| 160, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 160 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "8a7b90c915c1cecaf381c70594e3f955": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| }, | |
| { | |
| "name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b2-attn96", | |
| "devices": { | |
| "0": "npu:0:0" | |
| }, | |
| "tensors": { | |
| "d0_arg0_1": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg1_1": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "d0_arg2_1": { | |
| "shape": [ | |
| 2, | |
| 96, | |
| 96 | |
| ], | |
| "dtype": "bool" | |
| }, | |
| "d0_arg3_1": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32" | |
| }, | |
| "submod_d0_c0": { | |
| "shape": [ | |
| 2, | |
| 96, | |
| 2 | |
| ], | |
| "dtype": "f32" | |
| } | |
| }, | |
| "supertasks": { | |
| "0": { | |
| "kind": "input", | |
| "inputs": [], | |
| "outputs": [ | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg2_1", | |
| "d0_arg3_1" | |
| ] | |
| }, | |
| "1": { | |
| "kind": "output", | |
| "inputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "outputs": [] | |
| }, | |
| "2": { | |
| "kind": "edf", | |
| "inputs": [ | |
| "d0_arg2_1", | |
| "d0_arg0_1", | |
| "d0_arg1_1", | |
| "d0_arg3_1" | |
| ], | |
| "outputs": [ | |
| "submod_d0_c0" | |
| ], | |
| "device": "0", | |
| "data": null, | |
| "data_blob": "97bb3cab5f2f7f5f4640c04cbf3b6ee0" | |
| } | |
| }, | |
| "metadata": { | |
| "tensors": { | |
| "inputs": { | |
| "input_ids": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32", | |
| "idx": 0 | |
| }, | |
| "token_type_ids": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32", | |
| "idx": 1 | |
| }, | |
| "attention_mask": { | |
| "shape": [ | |
| 2, | |
| 96, | |
| 96 | |
| ], | |
| "dtype": "bool", | |
| "idx": 2 | |
| }, | |
| "position_ids": { | |
| "shape": [ | |
| 2, | |
| 96 | |
| ], | |
| "dtype": "i32", | |
| "idx": 3 | |
| } | |
| }, | |
| "outputs": { | |
| "logits": { | |
| "shape": [ | |
| 2, | |
| 96, | |
| 2 | |
| ], | |
| "dtype": "f32", | |
| "idx": 0 | |
| } | |
| } | |
| }, | |
| "tensor_slices": { | |
| "inputs": { | |
| "d0_arg0_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ] | |
| ], | |
| "origin": "input_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg1_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ] | |
| ], | |
| "origin": "token_type_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| }, | |
| "d0_arg2_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ] | |
| ], | |
| "origin": "attention_mask", | |
| "dtype": "bool", | |
| "device": "0" | |
| }, | |
| "d0_arg3_1": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ] | |
| ], | |
| "origin": "position_ids", | |
| "dtype": "i32", | |
| "device": "0" | |
| } | |
| }, | |
| "outputs": { | |
| "submod_d0_c0": { | |
| "placements": [ | |
| [ | |
| 0, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 96 | |
| ], | |
| [ | |
| 0, | |
| 2 | |
| ] | |
| ], | |
| "origin": "logits", | |
| "dtype": "f32", | |
| "device": "0" | |
| } | |
| } | |
| } | |
| }, | |
| "blobs": { | |
| "97bb3cab5f2f7f5f4640c04cbf3b6ee0": null | |
| }, | |
| "param_files": { | |
| "0": { | |
| "path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors", | |
| "format": "safetensors" | |
| } | |
| }, | |
| "device_constraints": [], | |
| "version": "0.1.0" | |
| } | |
| ], | |
| "pipeline_metadata_list": [ | |
| { | |
| "output_logits_size": null | |
| }, | |
| { | |
| "output_logits_size": null | |
| }, | |
| { | |
| "output_logits_size": null | |
| }, | |
| { | |
| "output_logits_size": null | |
| }, | |
| { | |
| "output_logits_size": null | |
| }, | |
| { | |
| "output_logits_size": null | |
| } | |
| ], | |
| "max_prompt_len": null | |
| }, | |
| "speculative_model": null, | |
| "version": { | |
| "major": 2, | |
| "minor": 0 | |
| }, | |
| "prefill_chunk_size": null | |
| } |