tessaig commited on
Commit
0cef54c
·
1 Parent(s): 1b14406

checkpoint added

Browse files
Files changed (44) hide show
  1. i/checkpoint-586/config.json +45 -0
  2. i/checkpoint-586/optimizer.pt +3 -0
  3. i/checkpoint-586/pytorch_model.bin +3 -0
  4. i/checkpoint-586/rng_state.pth +3 -0
  5. i/checkpoint-586/scheduler.pt +3 -0
  6. i/checkpoint-586/special_tokens_map.json +7 -0
  7. i/checkpoint-586/tokenizer.json +0 -0
  8. i/checkpoint-586/tokenizer_config.json +13 -0
  9. i/checkpoint-586/trainer_state.json +40 -0
  10. i/checkpoint-586/training_args.bin +3 -0
  11. i/checkpoint-586/vocab.txt +0 -0
  12. i_context/checkpoint-586/config.json +45 -0
  13. i_context/checkpoint-586/optimizer.pt +3 -0
  14. i_context/checkpoint-586/pytorch_model.bin +3 -0
  15. i_context/checkpoint-586/rng_state.pth +3 -0
  16. i_context/checkpoint-586/scheduler.pt +3 -0
  17. i_context/checkpoint-586/special_tokens_map.json +7 -0
  18. i_context/checkpoint-586/tokenizer.json +0 -0
  19. i_context/checkpoint-586/tokenizer_config.json +13 -0
  20. i_context/checkpoint-586/trainer_state.json +40 -0
  21. i_context/checkpoint-586/training_args.bin +3 -0
  22. i_context/checkpoint-586/vocab.txt +0 -0
  23. l/checkpoint-586/config.json +45 -0
  24. l/checkpoint-586/optimizer.pt +3 -0
  25. l/checkpoint-586/pytorch_model.bin +3 -0
  26. l/checkpoint-586/rng_state.pth +3 -0
  27. l/checkpoint-586/scheduler.pt +3 -0
  28. l/checkpoint-586/special_tokens_map.json +7 -0
  29. l/checkpoint-586/tokenizer.json +0 -0
  30. l/checkpoint-586/tokenizer_config.json +13 -0
  31. l/checkpoint-586/trainer_state.json +40 -0
  32. l/checkpoint-586/training_args.bin +3 -0
  33. l/checkpoint-586/vocab.txt +0 -0
  34. l_context/checkpoint-586/config.json +45 -0
  35. l_context/checkpoint-586/optimizer.pt +3 -0
  36. l_context/checkpoint-586/pytorch_model.bin +3 -0
  37. l_context/checkpoint-586/rng_state.pth +3 -0
  38. l_context/checkpoint-586/scheduler.pt +3 -0
  39. l_context/checkpoint-586/special_tokens_map.json +7 -0
  40. l_context/checkpoint-586/tokenizer.json +0 -0
  41. l_context/checkpoint-586/tokenizer_config.json +13 -0
  42. l_context/checkpoint-586/trainer_state.json +40 -0
  43. l_context/checkpoint-586/training_args.bin +3 -0
  44. l_context/checkpoint-586/vocab.txt +0 -0
i/checkpoint-586/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "conflict",
15
+ "1": "inference",
16
+ "2": "no_rel",
17
+ "3": "rephrase"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 4096,
21
+ "label2id": {
22
+ "conflict": 0,
23
+ "inference": 1,
24
+ "no_rel": 2,
25
+ "rephrase": 3
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "max_position_embeddings": 512,
29
+ "model_type": "bert",
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 0,
33
+ "pooler_fc_size": 768,
34
+ "pooler_num_attention_heads": 12,
35
+ "pooler_num_fc_layers": 3,
36
+ "pooler_size_per_head": 128,
37
+ "pooler_type": "first_token_transform",
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.31.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 28996
45
+ }
i/checkpoint-586/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c071b119f8a8d2def9bc6b38b6705d36d793826fa884507ce442aa400681bd6f
3
+ size 2668897797
i/checkpoint-586/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53075d76ecb13d2ffbd1bc6f724603c0ce9547fc44e33ce350a4d7f4a4334f22
3
+ size 1334467889
i/checkpoint-586/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
3
+ size 14575
i/checkpoint-586/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
3
+ size 627
i/checkpoint-586/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
i/checkpoint-586/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
i/checkpoint-586/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
i/checkpoint-586/trainer_state.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0550591945648193,
3
+ "best_model_checkpoint": "clean_output/bert-large-cased/i/checkpoint-586",
4
+ "epoch": 2.0,
5
+ "global_step": 586,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.5,
13
+ "eval_loss": 1.1290996074676514,
14
+ "eval_runtime": 10.4475,
15
+ "eval_samples_per_second": 223.978,
16
+ "eval_steps_per_second": 28.045,
17
+ "step": 293
18
+ },
19
+ {
20
+ "epoch": 1.71,
21
+ "learning_rate": 7.680097680097681e-06,
22
+ "loss": 1.174,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_accuracy": 0.5547008547008547,
28
+ "eval_loss": 1.0550591945648193,
29
+ "eval_runtime": 10.4781,
30
+ "eval_samples_per_second": 223.323,
31
+ "eval_steps_per_second": 27.963,
32
+ "step": 586
33
+ }
34
+ ],
35
+ "max_steps": 1758,
36
+ "num_train_epochs": 6,
37
+ "total_flos": 6812609916643200.0,
38
+ "trial_name": null,
39
+ "trial_params": null
40
+ }
i/checkpoint-586/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b07181fa39b72a45b37e999ef96b344aa5d73c1378eb6fbf7bcac9cb1ec357
3
+ size 3963
i/checkpoint-586/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
i_context/checkpoint-586/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "conflict",
15
+ "1": "inference",
16
+ "2": "no_rel",
17
+ "3": "rephrase"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 4096,
21
+ "label2id": {
22
+ "conflict": 0,
23
+ "inference": 1,
24
+ "no_rel": 2,
25
+ "rephrase": 3
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "max_position_embeddings": 512,
29
+ "model_type": "bert",
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 0,
33
+ "pooler_fc_size": 768,
34
+ "pooler_num_attention_heads": 12,
35
+ "pooler_num_fc_layers": 3,
36
+ "pooler_size_per_head": 128,
37
+ "pooler_type": "first_token_transform",
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.31.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 28996
45
+ }
i_context/checkpoint-586/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559ef893eee26ebfaf78999f62491999f95298cc468c8a8d40cc967d5a027384
3
+ size 2668897797
i_context/checkpoint-586/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5c5a5fd576f61fff46221d942d38c6096f39c2362998e3e182fb3d5f14b6c7
3
+ size 1334467889
i_context/checkpoint-586/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
3
+ size 14575
i_context/checkpoint-586/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
3
+ size 627
i_context/checkpoint-586/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
i_context/checkpoint-586/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
i_context/checkpoint-586/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
i_context/checkpoint-586/trainer_state.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0713884830474854,
3
+ "best_model_checkpoint": "clean_output/bert-large-cased/i_context/checkpoint-586",
4
+ "epoch": 2.0,
5
+ "global_step": 586,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.505982905982906,
13
+ "eval_loss": 1.1140286922454834,
14
+ "eval_runtime": 10.4473,
15
+ "eval_samples_per_second": 223.981,
16
+ "eval_steps_per_second": 28.045,
17
+ "step": 293
18
+ },
19
+ {
20
+ "epoch": 1.71,
21
+ "learning_rate": 7.680097680097681e-06,
22
+ "loss": 1.1604,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_accuracy": 0.5525641025641026,
28
+ "eval_loss": 1.0713884830474854,
29
+ "eval_runtime": 10.4432,
30
+ "eval_samples_per_second": 224.069,
31
+ "eval_steps_per_second": 28.056,
32
+ "step": 586
33
+ }
34
+ ],
35
+ "max_steps": 1758,
36
+ "num_train_epochs": 6,
37
+ "total_flos": 6812609916643200.0,
38
+ "trial_name": null,
39
+ "trial_params": null
40
+ }
i_context/checkpoint-586/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06df57ca0dfd63b697410147ad9225e8f4f93e6f69a771a4a4be4228b0806d53
3
+ size 4027
i_context/checkpoint-586/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
l/checkpoint-586/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "conflict",
15
+ "1": "inference",
16
+ "2": "no_rel",
17
+ "3": "rephrase"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 4096,
21
+ "label2id": {
22
+ "conflict": 0,
23
+ "inference": 1,
24
+ "no_rel": 2,
25
+ "rephrase": 3
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "max_position_embeddings": 512,
29
+ "model_type": "bert",
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 0,
33
+ "pooler_fc_size": 768,
34
+ "pooler_num_attention_heads": 12,
35
+ "pooler_num_fc_layers": 3,
36
+ "pooler_size_per_head": 128,
37
+ "pooler_type": "first_token_transform",
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.31.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 28996
45
+ }
l/checkpoint-586/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe404824bff6205d153485212c451c040068a75cfd640442d71423911e117e3
3
+ size 2668897797
l/checkpoint-586/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e207d853bb70695bb8fc98af2e739a914dd03304bb635ee1a1349832a999f82
3
+ size 1334467889
l/checkpoint-586/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
3
+ size 14575
l/checkpoint-586/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
3
+ size 627
l/checkpoint-586/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
l/checkpoint-586/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
l/checkpoint-586/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
l/checkpoint-586/trainer_state.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0608546733856201,
3
+ "best_model_checkpoint": "clean_output/bert-large-cased/l/checkpoint-586",
4
+ "epoch": 2.0,
5
+ "global_step": 586,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.5034188034188034,
13
+ "eval_loss": 1.1321873664855957,
14
+ "eval_runtime": 10.4655,
15
+ "eval_samples_per_second": 223.593,
16
+ "eval_steps_per_second": 27.997,
17
+ "step": 293
18
+ },
19
+ {
20
+ "epoch": 1.71,
21
+ "learning_rate": 7.680097680097681e-06,
22
+ "loss": 1.2069,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_accuracy": 0.5576923076923077,
28
+ "eval_loss": 1.0608546733856201,
29
+ "eval_runtime": 10.4441,
30
+ "eval_samples_per_second": 224.049,
31
+ "eval_steps_per_second": 28.054,
32
+ "step": 586
33
+ }
34
+ ],
35
+ "max_steps": 1758,
36
+ "num_train_epochs": 6,
37
+ "total_flos": 6812609916643200.0,
38
+ "trial_name": null,
39
+ "trial_params": null
40
+ }
l/checkpoint-586/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee04b6d4116139a70cf03a37ca11d73bb196bd288da167e5c8cb8cff4b157379
3
+ size 3963
l/checkpoint-586/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
l_context/checkpoint-586/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "conflict",
15
+ "1": "inference",
16
+ "2": "no_rel",
17
+ "3": "rephrase"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 4096,
21
+ "label2id": {
22
+ "conflict": 0,
23
+ "inference": 1,
24
+ "no_rel": 2,
25
+ "rephrase": 3
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "max_position_embeddings": 512,
29
+ "model_type": "bert",
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 0,
33
+ "pooler_fc_size": 768,
34
+ "pooler_num_attention_heads": 12,
35
+ "pooler_num_fc_layers": 3,
36
+ "pooler_size_per_head": 128,
37
+ "pooler_type": "first_token_transform",
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.31.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 28996
45
+ }
l_context/checkpoint-586/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e768a441837a5d026b0a1265654666f012f6be5229e031ab813984c514edb280
3
+ size 2668897797
l_context/checkpoint-586/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5810cde225e21af1ca4682005c1744989599639719aac93bb563d447d9852036
3
+ size 1334467889
l_context/checkpoint-586/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fb5630d7730234c36bdb4fb7e880dcb58367c0f810d19ba2082d8485b09c7c
3
+ size 14575
l_context/checkpoint-586/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99ebd2ec61759520f0786a379b7aa2b000e3a0998709cfd6554185df948834c
3
+ size 627
l_context/checkpoint-586/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
l_context/checkpoint-586/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
l_context/checkpoint-586/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
l_context/checkpoint-586/trainer_state.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.117621898651123,
3
+ "best_model_checkpoint": "clean_output/bert-large-cased/l_context/checkpoint-586",
4
+ "epoch": 2.0,
5
+ "global_step": 586,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.49786324786324787,
13
+ "eval_loss": 1.1456269025802612,
14
+ "eval_runtime": 10.4236,
15
+ "eval_samples_per_second": 224.491,
16
+ "eval_steps_per_second": 28.109,
17
+ "step": 293
18
+ },
19
+ {
20
+ "epoch": 1.71,
21
+ "learning_rate": 7.680097680097681e-06,
22
+ "loss": 1.2045,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_accuracy": 0.5205128205128206,
28
+ "eval_loss": 1.117621898651123,
29
+ "eval_runtime": 10.4232,
30
+ "eval_samples_per_second": 224.499,
31
+ "eval_steps_per_second": 28.11,
32
+ "step": 586
33
+ }
34
+ ],
35
+ "max_steps": 1758,
36
+ "num_train_epochs": 6,
37
+ "total_flos": 6812609916643200.0,
38
+ "trial_name": null,
39
+ "trial_params": null
40
+ }
l_context/checkpoint-586/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d41b07860c38688aa574373cb58e67ae3cae813d8b9e26d4b717ab98c7e820
3
+ size 4027
l_context/checkpoint-586/vocab.txt ADDED
The diff for this file is too large to render. See raw diff