hyunsikc's picture
Add files using upload-large-folder tool
5e8e2e7 verified
raw
history blame
38.7 kB
{
"metadata": {
"artifact_id": "d86d900b-d7f7-4838-9727-35ca1b0d4ec4",
"name": "mlperf-bert",
"timestamp": 1745456449,
"furiosa_llm_version": "249c6f1",
"furiosa_compiler_version": "b504d5d48"
},
"model": {
"generator_config": {
"position_id_pad": 1,
"buckets": [
{
"batch_size": 1,
"attention_size": 384,
"kv_cache_size": 0
},
{
"batch_size": 1,
"attention_size": 320,
"kv_cache_size": 0
},
{
"batch_size": 1,
"attention_size": 192,
"kv_cache_size": 0
},
{
"batch_size": 1,
"attention_size": 128,
"kv_cache_size": 0
},
{
"batch_size": 1,
"attention_size": 160,
"kv_cache_size": 0
},
{
"batch_size": 2,
"attention_size": 96,
"kv_cache_size": 0
}
],
"model_qname": "furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering",
"paged_attention_config": null,
"packing_type": "IDENTITY",
"kv_cache_sharing_across_beams_config": null,
"num_speculative_tokens": null,
"unpadded_vocab_size": null
},
"hf_config": {
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "float32",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": true,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": [
"BertForQuestionAnswering"
],
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": null,
"pad_token_id": 0,
"eos_token_id": null,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "furiosa-ai/mlperf-bert-large",
"_attn_implementation_autoset": false,
"transformers_version": "4.48.1",
"model_type": "bert",
"vocab_size": 30522,
"hidden_size": 1024,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"hidden_act": "rngd_gelu",
"intermediate_size": 4096,
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_position_embeddings": 512,
"type_vocab_size": 2,
"initializer_range": 0.02,
"layer_norm_eps": 1e-12,
"position_embedding_type": "absolute",
"use_cache": true,
"classifier_dropout": null
},
"model_metadata": {
"pretrained_id": "furiosa-ai/mlperf-bert-large",
"task_type": "question-answering",
"llm_config": {
"optimization_config": {
"attention_type": "VANILLA",
"optimize_rope": false,
"optimize_packed": false,
"decompose_layernorm": false,
"optimize_furiosa": false,
"use_unsplit_packed": true,
"compact_causal_mask": false,
"use_rngd_gelu": true,
"causal_mask_free_decoding": false,
"kv_cache_sharing_across_beams": false,
"inbound_beamsearch_softmax": false,
"calculate_logit_only_for_last_token": false,
"optimized_for_speculative_decoding": false
},
"quantization_config": {
"weight": "int8",
"activation": "int8",
"kv_cache": "int8",
"use_mcp": true
}
},
"hf_configs": {},
"model_weight_path": null,
"trust_remote_code": null,
"allow_bfloat16_cast_with_mcp": true,
"auto_bfloat16_cast": null
},
"model_rewriting_config": {
"do_decompositions_for_model_rewrite": false,
"use_blockwise_compile": true,
"embedding_layer_as_single_block": false,
"num_blocks_per_supertask": 24,
"embed_all_constants_into_graph": true,
"optimize_logit_shape": true
},
"parallel_config": {
"tensor_parallel_size": 1,
"pipeline_parallel_size": 1
},
"pipelines": [
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn384",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
1,
384
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
1,
384
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
1,
384,
384
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
1,
384
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
1,
384,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "92713480ca8937ba5a8dadead5278d92"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
1,
384
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
1,
384
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
1,
384,
384
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
1,
384
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
1,
384,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
1
],
[
0,
384
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
1
],
[
0,
384
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
1
],
[
0,
384
],
[
0,
384
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
1
],
[
0,
384
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
1
],
[
0,
384
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"92713480ca8937ba5a8dadead5278d92": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
},
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn320",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
1,
320
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
1,
320
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
1,
320,
320
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
1,
320
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
1,
320,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "0ff335c7ce60753ee28a910e9fab16f4"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
1,
320
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
1,
320
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
1,
320,
320
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
1,
320
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
1,
320,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
1
],
[
0,
320
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
1
],
[
0,
320
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
1
],
[
0,
320
],
[
0,
320
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
1
],
[
0,
320
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
1
],
[
0,
320
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"0ff335c7ce60753ee28a910e9fab16f4": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
},
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn192",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
1,
192
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
1,
192
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
1,
192,
192
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
1,
192
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
1,
192,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "eb1a559cd1f53e2ede74f1307030a1d0"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
1,
192
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
1,
192
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
1,
192,
192
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
1,
192
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
1,
192,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
1
],
[
0,
192
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
1
],
[
0,
192
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
1
],
[
0,
192
],
[
0,
192
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
1
],
[
0,
192
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
1
],
[
0,
192
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"eb1a559cd1f53e2ede74f1307030a1d0": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
},
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn128",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
1,
128
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
1,
128
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
1,
128,
128
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
1,
128
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
1,
128,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "9ad47915b97d47d3ce069c00271807d6"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
1,
128
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
1,
128
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
1,
128,
128
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
1,
128
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
1,
128,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
1
],
[
0,
128
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
1
],
[
0,
128
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
1
],
[
0,
128
],
[
0,
128
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
1
],
[
0,
128
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
1
],
[
0,
128
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"9ad47915b97d47d3ce069c00271807d6": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
},
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b1-attn160",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
1,
160
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
1,
160
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
1,
160,
160
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
1,
160
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
1,
160,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "8a7b90c915c1cecaf381c70594e3f955"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
1,
160
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
1,
160
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
1,
160,
160
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
1,
160
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
1,
160,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
1
],
[
0,
160
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
1
],
[
0,
160
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
1
],
[
0,
160
],
[
0,
160
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
1
],
[
0,
160
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
1
],
[
0,
160
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"8a7b90c915c1cecaf381c70594e3f955": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
},
{
"name": "Quantized_furiosa_llm_models.bert.symbolic.mlperf_submission.BertForQuestionAnswering-kv0-b2-attn96",
"devices": {
"0": "npu:0:0"
},
"tensors": {
"d0_arg0_1": {
"shape": [
2,
96
],
"dtype": "i32"
},
"d0_arg1_1": {
"shape": [
2,
96
],
"dtype": "i32"
},
"d0_arg2_1": {
"shape": [
2,
96,
96
],
"dtype": "bool"
},
"d0_arg3_1": {
"shape": [
2,
96
],
"dtype": "i32"
},
"submod_d0_c0": {
"shape": [
2,
96,
2
],
"dtype": "f32"
}
},
"supertasks": {
"0": {
"kind": "input",
"inputs": [],
"outputs": [
"d0_arg0_1",
"d0_arg1_1",
"d0_arg2_1",
"d0_arg3_1"
]
},
"1": {
"kind": "output",
"inputs": [
"submod_d0_c0"
],
"outputs": []
},
"2": {
"kind": "edf",
"inputs": [
"d0_arg2_1",
"d0_arg0_1",
"d0_arg1_1",
"d0_arg3_1"
],
"outputs": [
"submod_d0_c0"
],
"device": "0",
"data": null,
"data_blob": "97bb3cab5f2f7f5f4640c04cbf3b6ee0"
}
},
"metadata": {
"tensors": {
"inputs": {
"input_ids": {
"shape": [
2,
96
],
"dtype": "i32",
"idx": 0
},
"token_type_ids": {
"shape": [
2,
96
],
"dtype": "i32",
"idx": 1
},
"attention_mask": {
"shape": [
2,
96,
96
],
"dtype": "bool",
"idx": 2
},
"position_ids": {
"shape": [
2,
96
],
"dtype": "i32",
"idx": 3
}
},
"outputs": {
"logits": {
"shape": [
2,
96,
2
],
"dtype": "f32",
"idx": 0
}
}
},
"tensor_slices": {
"inputs": {
"d0_arg0_1": {
"placements": [
[
0,
2
],
[
0,
96
]
],
"origin": "input_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg1_1": {
"placements": [
[
0,
2
],
[
0,
96
]
],
"origin": "token_type_ids",
"dtype": "i32",
"device": "0"
},
"d0_arg2_1": {
"placements": [
[
0,
2
],
[
0,
96
],
[
0,
96
]
],
"origin": "attention_mask",
"dtype": "bool",
"device": "0"
},
"d0_arg3_1": {
"placements": [
[
0,
2
],
[
0,
96
]
],
"origin": "position_ids",
"dtype": "i32",
"device": "0"
}
},
"outputs": {
"submod_d0_c0": {
"placements": [
[
0,
2
],
[
0,
96
],
[
0,
2
]
],
"origin": "logits",
"dtype": "f32",
"device": "0"
}
}
}
},
"blobs": {
"97bb3cab5f2f7f5f4640c04cbf3b6ee0": null
},
"param_files": {
"0": {
"path": "params-mlperf-bert-large-mlperf_submission-24L-W8A8KV8-allow_bfloat16_cast_with_mcp-ba480aa7f239d5bf87fdd9b369ce396c7f516f5fcecf3f40000671d6299f6f5c.safetensors",
"format": "safetensors"
}
},
"device_constraints": [],
"version": "0.1.0"
}
],
"pipeline_metadata_list": [
{
"output_logits_size": null
},
{
"output_logits_size": null
},
{
"output_logits_size": null
},
{
"output_logits_size": null
},
{
"output_logits_size": null
},
{
"output_logits_size": null
}
],
"max_prompt_len": null
},
"speculative_model": null,
"version": {
"major": 2,
"minor": 0
},
"prefill_chunk_size": null
}