checkpoint added
Browse files- i/checkpoint-586/config.json +45 -0
- i/checkpoint-586/optimizer.pt +3 -0
- i/checkpoint-586/pytorch_model.bin +3 -0
- i/checkpoint-586/rng_state.pth +3 -0
- i/checkpoint-586/scheduler.pt +3 -0
- i/checkpoint-586/special_tokens_map.json +7 -0
- i/checkpoint-586/tokenizer.json +0 -0
- i/checkpoint-586/tokenizer_config.json +13 -0
- i/checkpoint-586/trainer_state.json +40 -0
- i/checkpoint-586/training_args.bin +3 -0
- i/checkpoint-586/vocab.txt +0 -0
- i_context/checkpoint-586/config.json +45 -0
- i_context/checkpoint-586/optimizer.pt +3 -0
- i_context/checkpoint-586/pytorch_model.bin +3 -0
- i_context/checkpoint-586/rng_state.pth +3 -0
- i_context/checkpoint-586/scheduler.pt +3 -0
- i_context/checkpoint-586/special_tokens_map.json +7 -0
- i_context/checkpoint-586/tokenizer.json +0 -0
- i_context/checkpoint-586/tokenizer_config.json +13 -0
- i_context/checkpoint-586/trainer_state.json +40 -0
- i_context/checkpoint-586/training_args.bin +3 -0
- i_context/checkpoint-586/vocab.txt +0 -0
- l/checkpoint-586/config.json +45 -0
- l/checkpoint-586/optimizer.pt +3 -0
- l/checkpoint-586/pytorch_model.bin +3 -0
- l/checkpoint-586/rng_state.pth +3 -0
- l/checkpoint-586/scheduler.pt +3 -0
- l/checkpoint-586/special_tokens_map.json +7 -0
- l/checkpoint-586/tokenizer.json +0 -0
- l/checkpoint-586/tokenizer_config.json +13 -0
- l/checkpoint-586/trainer_state.json +40 -0
- l/checkpoint-586/training_args.bin +3 -0
- l/checkpoint-586/vocab.txt +0 -0
- l_context/checkpoint-586/config.json +45 -0
- l_context/checkpoint-586/optimizer.pt +3 -0
- l_context/checkpoint-586/pytorch_model.bin +3 -0
- l_context/checkpoint-586/rng_state.pth +3 -0
- l_context/checkpoint-586/scheduler.pt +3 -0
- l_context/checkpoint-586/special_tokens_map.json +7 -0
- l_context/checkpoint-586/tokenizer.json +0 -0
- l_context/checkpoint-586/tokenizer_config.json +13 -0
- l_context/checkpoint-586/trainer_state.json +40 -0
- l_context/checkpoint-586/training_args.bin +3 -0
- l_context/checkpoint-586/vocab.txt +0 -0
i/checkpoint-586/config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "bert-large-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "conflict",
|
| 15 |
+
"1": "inference",
|
| 16 |
+
"2": "no_rel",
|
| 17 |
+
"3": "rephrase"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 4096,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"conflict": 0,
|
| 23 |
+
"inference": 1,
|
| 24 |
+
"no_rel": 2,
|
| 25 |
+
"rephrase": 3
|
| 26 |
+
},
|
| 27 |
+
"layer_norm_eps": 1e-12,
|
| 28 |
+
"max_position_embeddings": 512,
|
| 29 |
+
"model_type": "bert",
|
| 30 |
+
"num_attention_heads": 16,
|
| 31 |
+
"num_hidden_layers": 24,
|
| 32 |
+
"pad_token_id": 0,
|
| 33 |
+
"pooler_fc_size": 768,
|
| 34 |
+
"pooler_num_attention_heads": 12,
|
| 35 |
+
"pooler_num_fc_layers": 3,
|
| 36 |
+
"pooler_size_per_head": 128,
|
| 37 |
+
"pooler_type": "first_token_transform",
|
| 38 |
+
"position_embedding_type": "absolute",
|
| 39 |
+
"problem_type": "single_label_classification",
|
| 40 |
+
"torch_dtype": "float32",
|
| 41 |
+
"transformers_version": "4.31.0",
|
| 42 |
+
"type_vocab_size": 2,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 28996
|
| 45 |
+
}
|
i/checkpoint-586/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c071b119f8a8d2def9bc6b38b6705d36d793826fa884507ce442aa400681bd6f
|
| 3 |
+
size 2668897797
|
i/checkpoint-586/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53075d76ecb13d2ffbd1bc6f724603c0ce9547fc44e33ce350a4d7f4a4334f22
|
| 3 |
+
size 1334467889
|
i/checkpoint-586/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
|
| 3 |
+
size 14575
|
i/checkpoint-586/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
|
| 3 |
+
size 627
|
i/checkpoint-586/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
i/checkpoint-586/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
i/checkpoint-586/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clean_up_tokenization_spaces": true,
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"model_max_length": 512,
|
| 7 |
+
"pad_token": "[PAD]",
|
| 8 |
+
"sep_token": "[SEP]",
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
i/checkpoint-586/trainer_state.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 1.0550591945648193,
|
| 3 |
+
"best_model_checkpoint": "clean_output/bert-large-cased/i/checkpoint-586",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"global_step": 586,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"eval_accuracy": 0.5,
|
| 13 |
+
"eval_loss": 1.1290996074676514,
|
| 14 |
+
"eval_runtime": 10.4475,
|
| 15 |
+
"eval_samples_per_second": 223.978,
|
| 16 |
+
"eval_steps_per_second": 28.045,
|
| 17 |
+
"step": 293
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.71,
|
| 21 |
+
"learning_rate": 7.680097680097681e-06,
|
| 22 |
+
"loss": 1.174,
|
| 23 |
+
"step": 500
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 2.0,
|
| 27 |
+
"eval_accuracy": 0.5547008547008547,
|
| 28 |
+
"eval_loss": 1.0550591945648193,
|
| 29 |
+
"eval_runtime": 10.4781,
|
| 30 |
+
"eval_samples_per_second": 223.323,
|
| 31 |
+
"eval_steps_per_second": 27.963,
|
| 32 |
+
"step": 586
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
"max_steps": 1758,
|
| 36 |
+
"num_train_epochs": 6,
|
| 37 |
+
"total_flos": 6812609916643200.0,
|
| 38 |
+
"trial_name": null,
|
| 39 |
+
"trial_params": null
|
| 40 |
+
}
|
i/checkpoint-586/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06b07181fa39b72a45b37e999ef96b344aa5d73c1378eb6fbf7bcac9cb1ec357
|
| 3 |
+
size 3963
|
i/checkpoint-586/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
i_context/checkpoint-586/config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "bert-large-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "conflict",
|
| 15 |
+
"1": "inference",
|
| 16 |
+
"2": "no_rel",
|
| 17 |
+
"3": "rephrase"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 4096,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"conflict": 0,
|
| 23 |
+
"inference": 1,
|
| 24 |
+
"no_rel": 2,
|
| 25 |
+
"rephrase": 3
|
| 26 |
+
},
|
| 27 |
+
"layer_norm_eps": 1e-12,
|
| 28 |
+
"max_position_embeddings": 512,
|
| 29 |
+
"model_type": "bert",
|
| 30 |
+
"num_attention_heads": 16,
|
| 31 |
+
"num_hidden_layers": 24,
|
| 32 |
+
"pad_token_id": 0,
|
| 33 |
+
"pooler_fc_size": 768,
|
| 34 |
+
"pooler_num_attention_heads": 12,
|
| 35 |
+
"pooler_num_fc_layers": 3,
|
| 36 |
+
"pooler_size_per_head": 128,
|
| 37 |
+
"pooler_type": "first_token_transform",
|
| 38 |
+
"position_embedding_type": "absolute",
|
| 39 |
+
"problem_type": "single_label_classification",
|
| 40 |
+
"torch_dtype": "float32",
|
| 41 |
+
"transformers_version": "4.31.0",
|
| 42 |
+
"type_vocab_size": 2,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 28996
|
| 45 |
+
}
|
i_context/checkpoint-586/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:559ef893eee26ebfaf78999f62491999f95298cc468c8a8d40cc967d5a027384
|
| 3 |
+
size 2668897797
|
i_context/checkpoint-586/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d5c5a5fd576f61fff46221d942d38c6096f39c2362998e3e182fb3d5f14b6c7
|
| 3 |
+
size 1334467889
|
i_context/checkpoint-586/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
|
| 3 |
+
size 14575
|
i_context/checkpoint-586/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
|
| 3 |
+
size 627
|
i_context/checkpoint-586/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
i_context/checkpoint-586/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
i_context/checkpoint-586/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clean_up_tokenization_spaces": true,
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"model_max_length": 512,
|
| 7 |
+
"pad_token": "[PAD]",
|
| 8 |
+
"sep_token": "[SEP]",
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
i_context/checkpoint-586/trainer_state.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 1.0713884830474854,
|
| 3 |
+
"best_model_checkpoint": "clean_output/bert-large-cased/i_context/checkpoint-586",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"global_step": 586,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"eval_accuracy": 0.505982905982906,
|
| 13 |
+
"eval_loss": 1.1140286922454834,
|
| 14 |
+
"eval_runtime": 10.4473,
|
| 15 |
+
"eval_samples_per_second": 223.981,
|
| 16 |
+
"eval_steps_per_second": 28.045,
|
| 17 |
+
"step": 293
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.71,
|
| 21 |
+
"learning_rate": 7.680097680097681e-06,
|
| 22 |
+
"loss": 1.1604,
|
| 23 |
+
"step": 500
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 2.0,
|
| 27 |
+
"eval_accuracy": 0.5525641025641026,
|
| 28 |
+
"eval_loss": 1.0713884830474854,
|
| 29 |
+
"eval_runtime": 10.4432,
|
| 30 |
+
"eval_samples_per_second": 224.069,
|
| 31 |
+
"eval_steps_per_second": 28.056,
|
| 32 |
+
"step": 586
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
"max_steps": 1758,
|
| 36 |
+
"num_train_epochs": 6,
|
| 37 |
+
"total_flos": 6812609916643200.0,
|
| 38 |
+
"trial_name": null,
|
| 39 |
+
"trial_params": null
|
| 40 |
+
}
|
i_context/checkpoint-586/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06df57ca0dfd63b697410147ad9225e8f4f93e6f69a771a4a4be4228b0806d53
|
| 3 |
+
size 4027
|
i_context/checkpoint-586/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
l/checkpoint-586/config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "bert-large-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "conflict",
|
| 15 |
+
"1": "inference",
|
| 16 |
+
"2": "no_rel",
|
| 17 |
+
"3": "rephrase"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 4096,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"conflict": 0,
|
| 23 |
+
"inference": 1,
|
| 24 |
+
"no_rel": 2,
|
| 25 |
+
"rephrase": 3
|
| 26 |
+
},
|
| 27 |
+
"layer_norm_eps": 1e-12,
|
| 28 |
+
"max_position_embeddings": 512,
|
| 29 |
+
"model_type": "bert",
|
| 30 |
+
"num_attention_heads": 16,
|
| 31 |
+
"num_hidden_layers": 24,
|
| 32 |
+
"pad_token_id": 0,
|
| 33 |
+
"pooler_fc_size": 768,
|
| 34 |
+
"pooler_num_attention_heads": 12,
|
| 35 |
+
"pooler_num_fc_layers": 3,
|
| 36 |
+
"pooler_size_per_head": 128,
|
| 37 |
+
"pooler_type": "first_token_transform",
|
| 38 |
+
"position_embedding_type": "absolute",
|
| 39 |
+
"problem_type": "single_label_classification",
|
| 40 |
+
"torch_dtype": "float32",
|
| 41 |
+
"transformers_version": "4.31.0",
|
| 42 |
+
"type_vocab_size": 2,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 28996
|
| 45 |
+
}
|
l/checkpoint-586/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fe404824bff6205d153485212c451c040068a75cfd640442d71423911e117e3
|
| 3 |
+
size 2668897797
|
l/checkpoint-586/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e207d853bb70695bb8fc98af2e739a914dd03304bb635ee1a1349832a999f82
|
| 3 |
+
size 1334467889
|
l/checkpoint-586/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
|
| 3 |
+
size 14575
|
l/checkpoint-586/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
|
| 3 |
+
size 627
|
l/checkpoint-586/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
l/checkpoint-586/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
l/checkpoint-586/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clean_up_tokenization_spaces": true,
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"model_max_length": 512,
|
| 7 |
+
"pad_token": "[PAD]",
|
| 8 |
+
"sep_token": "[SEP]",
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
l/checkpoint-586/trainer_state.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 1.0608546733856201,
|
| 3 |
+
"best_model_checkpoint": "clean_output/bert-large-cased/l/checkpoint-586",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"global_step": 586,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"eval_accuracy": 0.5034188034188034,
|
| 13 |
+
"eval_loss": 1.1321873664855957,
|
| 14 |
+
"eval_runtime": 10.4655,
|
| 15 |
+
"eval_samples_per_second": 223.593,
|
| 16 |
+
"eval_steps_per_second": 27.997,
|
| 17 |
+
"step": 293
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.71,
|
| 21 |
+
"learning_rate": 7.680097680097681e-06,
|
| 22 |
+
"loss": 1.2069,
|
| 23 |
+
"step": 500
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 2.0,
|
| 27 |
+
"eval_accuracy": 0.5576923076923077,
|
| 28 |
+
"eval_loss": 1.0608546733856201,
|
| 29 |
+
"eval_runtime": 10.4441,
|
| 30 |
+
"eval_samples_per_second": 224.049,
|
| 31 |
+
"eval_steps_per_second": 28.054,
|
| 32 |
+
"step": 586
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
"max_steps": 1758,
|
| 36 |
+
"num_train_epochs": 6,
|
| 37 |
+
"total_flos": 6812609916643200.0,
|
| 38 |
+
"trial_name": null,
|
| 39 |
+
"trial_params": null
|
| 40 |
+
}
|
l/checkpoint-586/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee04b6d4116139a70cf03a37ca11d73bb196bd288da167e5c8cb8cff4b157379
|
| 3 |
+
size 3963
|
l/checkpoint-586/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
l_context/checkpoint-586/config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "bert-large-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "conflict",
|
| 15 |
+
"1": "inference",
|
| 16 |
+
"2": "no_rel",
|
| 17 |
+
"3": "rephrase"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 4096,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"conflict": 0,
|
| 23 |
+
"inference": 1,
|
| 24 |
+
"no_rel": 2,
|
| 25 |
+
"rephrase": 3
|
| 26 |
+
},
|
| 27 |
+
"layer_norm_eps": 1e-12,
|
| 28 |
+
"max_position_embeddings": 512,
|
| 29 |
+
"model_type": "bert",
|
| 30 |
+
"num_attention_heads": 16,
|
| 31 |
+
"num_hidden_layers": 24,
|
| 32 |
+
"pad_token_id": 0,
|
| 33 |
+
"pooler_fc_size": 768,
|
| 34 |
+
"pooler_num_attention_heads": 12,
|
| 35 |
+
"pooler_num_fc_layers": 3,
|
| 36 |
+
"pooler_size_per_head": 128,
|
| 37 |
+
"pooler_type": "first_token_transform",
|
| 38 |
+
"position_embedding_type": "absolute",
|
| 39 |
+
"problem_type": "single_label_classification",
|
| 40 |
+
"torch_dtype": "float32",
|
| 41 |
+
"transformers_version": "4.31.0",
|
| 42 |
+
"type_vocab_size": 2,
|
| 43 |
+
"use_cache": true,
|
| 44 |
+
"vocab_size": 28996
|
| 45 |
+
}
|
l_context/checkpoint-586/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e768a441837a5d026b0a1265654666f012f6be5229e031ab813984c514edb280
|
| 3 |
+
size 2668897797
|
l_context/checkpoint-586/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5810cde225e21af1ca4682005c1744989599639719aac93bb563d447d9852036
|
| 3 |
+
size 1334467889
|
l_context/checkpoint-586/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
|
| 3 |
+
size 14575
|
l_context/checkpoint-586/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
|
| 3 |
+
size 627
|
l_context/checkpoint-586/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
l_context/checkpoint-586/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
l_context/checkpoint-586/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clean_up_tokenization_spaces": true,
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"model_max_length": 512,
|
| 7 |
+
"pad_token": "[PAD]",
|
| 8 |
+
"sep_token": "[SEP]",
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
l_context/checkpoint-586/trainer_state.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 1.117621898651123,
|
| 3 |
+
"best_model_checkpoint": "clean_output/bert-large-cased/l_context/checkpoint-586",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"global_step": 586,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"eval_accuracy": 0.49786324786324787,
|
| 13 |
+
"eval_loss": 1.1456269025802612,
|
| 14 |
+
"eval_runtime": 10.4236,
|
| 15 |
+
"eval_samples_per_second": 224.491,
|
| 16 |
+
"eval_steps_per_second": 28.109,
|
| 17 |
+
"step": 293
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.71,
|
| 21 |
+
"learning_rate": 7.680097680097681e-06,
|
| 22 |
+
"loss": 1.2045,
|
| 23 |
+
"step": 500
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 2.0,
|
| 27 |
+
"eval_accuracy": 0.5205128205128206,
|
| 28 |
+
"eval_loss": 1.117621898651123,
|
| 29 |
+
"eval_runtime": 10.4232,
|
| 30 |
+
"eval_samples_per_second": 224.499,
|
| 31 |
+
"eval_steps_per_second": 28.11,
|
| 32 |
+
"step": 586
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
"max_steps": 1758,
|
| 36 |
+
"num_train_epochs": 6,
|
| 37 |
+
"total_flos": 6812609916643200.0,
|
| 38 |
+
"trial_name": null,
|
| 39 |
+
"trial_params": null
|
| 40 |
+
}
|
l_context/checkpoint-586/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38d41b07860c38688aa574373cb58e67ae3cae813d8b9e26d4b717ab98c7e820
|
| 3 |
+
size 4027
|
l_context/checkpoint-586/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|