shenkha commited on
Commit
3439a48
·
verified ·
1 Parent(s): c2ed36e

Upload 12 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<CTX>": 32101,
3
+ "<PS>": 32100,
4
+ "<SEP>": 32102
5
+ }
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.99,
3
+ "eval_bleu": 1.3579,
4
+ "eval_gen_len": 25.1463,
5
+ "eval_loss": 3.203944444656372,
6
+ "eval_runtime": 459.3966,
7
+ "eval_samples": 5651,
8
+ "eval_samples_per_second": 12.301,
9
+ "eval_steps_per_second": 0.309,
10
+ "train_loss": 3.3598651123046874,
11
+ "train_runtime": 21589.3175,
12
+ "train_samples": 27018,
13
+ "train_samples_per_second": 62.573,
14
+ "train_steps_per_second": 0.065
15
+ }
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-t5/t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 6,
21
+ "num_heads": 8,
22
+ "num_layers": 6,
23
+ "output_past": true,
24
+ "pad_token_id": 1,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.24.0",
58
+ "use_cache": true,
59
+ "vocab_size": 32103
60
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.99,
3
+ "eval_bleu": 1.3579,
4
+ "eval_gen_len": 25.1463,
5
+ "eval_loss": 3.203944444656372,
6
+ "eval_runtime": 459.3966,
7
+ "eval_samples": 5651,
8
+ "eval_samples_per_second": 12.301,
9
+ "eval_steps_per_second": 0.309
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7fc5442a0a390c069a0df3661d0dab77610a9b16002f720ade85e401f4d5a13
3
+ size 242020886
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "</s>",
106
+ "unk_token": "<unk>"
107
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "name_or_path": "google-t5/t5-small",
109
+ "pad_token": "<pad>",
110
+ "special_tokens_map_file": null,
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.99,
3
+ "train_loss": 3.3598651123046874,
4
+ "train_runtime": 21589.3175,
5
+ "train_samples": 27018,
6
+ "train_samples_per_second": 62.573,
7
+ "train_steps_per_second": 0.065
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.99467140319716,
5
+ "global_step": 1400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.99,
12
+ "eval_bleu": 0.6549,
13
+ "eval_gen_len": 14.2911,
14
+ "eval_loss": 3.51861834526062,
15
+ "eval_runtime": 90.1677,
16
+ "eval_samples_per_second": 62.672,
17
+ "eval_steps_per_second": 1.575,
18
+ "step": 28
19
+ },
20
+ {
21
+ "epoch": 1.99,
22
+ "eval_bleu": 0.7119,
23
+ "eval_gen_len": 15.2173,
24
+ "eval_loss": 3.422013998031616,
25
+ "eval_runtime": 90.001,
26
+ "eval_samples_per_second": 62.788,
27
+ "eval_steps_per_second": 1.578,
28
+ "step": 56
29
+ },
30
+ {
31
+ "epoch": 2.99,
32
+ "eval_bleu": 0.7933,
33
+ "eval_gen_len": 15.77,
34
+ "eval_loss": 3.3816401958465576,
35
+ "eval_runtime": 90.2248,
36
+ "eval_samples_per_second": 62.632,
37
+ "eval_steps_per_second": 1.574,
38
+ "step": 84
39
+ },
40
+ {
41
+ "epoch": 3.99,
42
+ "eval_bleu": 0.828,
43
+ "eval_gen_len": 15.9515,
44
+ "eval_loss": 3.357332944869995,
45
+ "eval_runtime": 90.0431,
46
+ "eval_samples_per_second": 62.759,
47
+ "eval_steps_per_second": 1.577,
48
+ "step": 112
49
+ },
50
+ {
51
+ "epoch": 4.99,
52
+ "eval_bleu": 0.8564,
53
+ "eval_gen_len": 15.9135,
54
+ "eval_loss": 3.3394410610198975,
55
+ "eval_runtime": 90.2255,
56
+ "eval_samples_per_second": 62.632,
57
+ "eval_steps_per_second": 1.574,
58
+ "step": 140
59
+ },
60
+ {
61
+ "epoch": 5.99,
62
+ "eval_bleu": 0.9038,
63
+ "eval_gen_len": 15.9788,
64
+ "eval_loss": 3.324686050415039,
65
+ "eval_runtime": 90.177,
66
+ "eval_samples_per_second": 62.666,
67
+ "eval_steps_per_second": 1.575,
68
+ "step": 168
69
+ },
70
+ {
71
+ "epoch": 6.99,
72
+ "eval_bleu": 0.9239,
73
+ "eval_gen_len": 16.1274,
74
+ "eval_loss": 3.312596082687378,
75
+ "eval_runtime": 90.2242,
76
+ "eval_samples_per_second": 62.633,
77
+ "eval_steps_per_second": 1.574,
78
+ "step": 196
79
+ },
80
+ {
81
+ "epoch": 7.99,
82
+ "eval_bleu": 0.9535,
83
+ "eval_gen_len": 16.1568,
84
+ "eval_loss": 3.3029773235321045,
85
+ "eval_runtime": 89.9892,
86
+ "eval_samples_per_second": 62.796,
87
+ "eval_steps_per_second": 1.578,
88
+ "step": 224
89
+ },
90
+ {
91
+ "epoch": 8.99,
92
+ "eval_bleu": 0.9392,
93
+ "eval_gen_len": 16.2577,
94
+ "eval_loss": 3.2943568229675293,
95
+ "eval_runtime": 90.2103,
96
+ "eval_samples_per_second": 62.643,
97
+ "eval_steps_per_second": 1.574,
98
+ "step": 252
99
+ },
100
+ {
101
+ "epoch": 9.99,
102
+ "eval_bleu": 1.0057,
103
+ "eval_gen_len": 16.4258,
104
+ "eval_loss": 3.286745548248291,
105
+ "eval_runtime": 90.3866,
106
+ "eval_samples_per_second": 62.52,
107
+ "eval_steps_per_second": 1.571,
108
+ "step": 280
109
+ },
110
+ {
111
+ "epoch": 10.99,
112
+ "eval_bleu": 1.0136,
113
+ "eval_gen_len": 16.3022,
114
+ "eval_loss": 3.2792539596557617,
115
+ "eval_runtime": 90.1642,
116
+ "eval_samples_per_second": 62.675,
117
+ "eval_steps_per_second": 1.575,
118
+ "step": 308
119
+ },
120
+ {
121
+ "epoch": 11.99,
122
+ "eval_bleu": 1.0151,
123
+ "eval_gen_len": 16.3868,
124
+ "eval_loss": 3.273854970932007,
125
+ "eval_runtime": 90.0094,
126
+ "eval_samples_per_second": 62.782,
127
+ "eval_steps_per_second": 1.578,
128
+ "step": 336
129
+ },
130
+ {
131
+ "epoch": 12.99,
132
+ "eval_bleu": 1.0137,
133
+ "eval_gen_len": 16.2493,
134
+ "eval_loss": 3.2679247856140137,
135
+ "eval_runtime": 90.2543,
136
+ "eval_samples_per_second": 62.612,
137
+ "eval_steps_per_second": 1.573,
138
+ "step": 364
139
+ },
140
+ {
141
+ "epoch": 13.99,
142
+ "eval_bleu": 1.0062,
143
+ "eval_gen_len": 16.3263,
144
+ "eval_loss": 3.263322353363037,
145
+ "eval_runtime": 90.0803,
146
+ "eval_samples_per_second": 62.733,
147
+ "eval_steps_per_second": 1.576,
148
+ "step": 392
149
+ },
150
+ {
151
+ "epoch": 14.99,
152
+ "eval_bleu": 1.0205,
153
+ "eval_gen_len": 16.1936,
154
+ "eval_loss": 3.258091449737549,
155
+ "eval_runtime": 90.2141,
156
+ "eval_samples_per_second": 62.64,
157
+ "eval_steps_per_second": 1.574,
158
+ "step": 420
159
+ },
160
+ {
161
+ "epoch": 15.99,
162
+ "eval_bleu": 1.0392,
163
+ "eval_gen_len": 16.2488,
164
+ "eval_loss": 3.254173755645752,
165
+ "eval_runtime": 90.1523,
166
+ "eval_samples_per_second": 62.683,
167
+ "eval_steps_per_second": 1.575,
168
+ "step": 448
169
+ },
170
+ {
171
+ "epoch": 16.99,
172
+ "eval_bleu": 1.0613,
173
+ "eval_gen_len": 16.1219,
174
+ "eval_loss": 3.249685764312744,
175
+ "eval_runtime": 90.3649,
176
+ "eval_samples_per_second": 62.535,
177
+ "eval_steps_per_second": 1.571,
178
+ "step": 476
179
+ },
180
+ {
181
+ "epoch": 17.85,
182
+ "learning_rate": 3.2142857142857144e-05,
183
+ "loss": 3.4648,
184
+ "step": 500
185
+ },
186
+ {
187
+ "epoch": 17.99,
188
+ "eval_bleu": 1.0755,
189
+ "eval_gen_len": 16.2776,
190
+ "eval_loss": 3.2462830543518066,
191
+ "eval_runtime": 90.126,
192
+ "eval_samples_per_second": 62.701,
193
+ "eval_steps_per_second": 1.576,
194
+ "step": 504
195
+ },
196
+ {
197
+ "epoch": 18.99,
198
+ "eval_bleu": 1.0772,
199
+ "eval_gen_len": 16.3072,
200
+ "eval_loss": 3.2422850131988525,
201
+ "eval_runtime": 90.07,
202
+ "eval_samples_per_second": 62.74,
203
+ "eval_steps_per_second": 1.577,
204
+ "step": 532
205
+ },
206
+ {
207
+ "epoch": 19.99,
208
+ "eval_bleu": 1.0893,
209
+ "eval_gen_len": 16.3113,
210
+ "eval_loss": 3.2396700382232666,
211
+ "eval_runtime": 90.1336,
212
+ "eval_samples_per_second": 62.696,
213
+ "eval_steps_per_second": 1.575,
214
+ "step": 560
215
+ },
216
+ {
217
+ "epoch": 20.99,
218
+ "eval_bleu": 1.0591,
219
+ "eval_gen_len": 16.2219,
220
+ "eval_loss": 3.236823797225952,
221
+ "eval_runtime": 90.1085,
222
+ "eval_samples_per_second": 62.713,
223
+ "eval_steps_per_second": 1.576,
224
+ "step": 588
225
+ },
226
+ {
227
+ "epoch": 21.99,
228
+ "eval_bleu": 1.0858,
229
+ "eval_gen_len": 16.3468,
230
+ "eval_loss": 3.23404598236084,
231
+ "eval_runtime": 89.9968,
232
+ "eval_samples_per_second": 62.791,
233
+ "eval_steps_per_second": 1.578,
234
+ "step": 616
235
+ },
236
+ {
237
+ "epoch": 22.99,
238
+ "eval_bleu": 1.1204,
239
+ "eval_gen_len": 16.284,
240
+ "eval_loss": 3.2314248085021973,
241
+ "eval_runtime": 89.8557,
242
+ "eval_samples_per_second": 62.89,
243
+ "eval_steps_per_second": 1.58,
244
+ "step": 644
245
+ },
246
+ {
247
+ "epoch": 23.99,
248
+ "eval_bleu": 1.1199,
249
+ "eval_gen_len": 16.3371,
250
+ "eval_loss": 3.2291789054870605,
251
+ "eval_runtime": 89.9552,
252
+ "eval_samples_per_second": 62.82,
253
+ "eval_steps_per_second": 1.579,
254
+ "step": 672
255
+ },
256
+ {
257
+ "epoch": 24.99,
258
+ "eval_bleu": 1.1413,
259
+ "eval_gen_len": 16.3203,
260
+ "eval_loss": 3.226853847503662,
261
+ "eval_runtime": 90.1817,
262
+ "eval_samples_per_second": 62.662,
263
+ "eval_steps_per_second": 1.575,
264
+ "step": 700
265
+ },
266
+ {
267
+ "epoch": 25.99,
268
+ "eval_bleu": 1.1415,
269
+ "eval_gen_len": 16.3137,
270
+ "eval_loss": 3.2246992588043213,
271
+ "eval_runtime": 90.2118,
272
+ "eval_samples_per_second": 62.642,
273
+ "eval_steps_per_second": 1.574,
274
+ "step": 728
275
+ },
276
+ {
277
+ "epoch": 26.99,
278
+ "eval_bleu": 1.1522,
279
+ "eval_gen_len": 16.2957,
280
+ "eval_loss": 3.2227494716644287,
281
+ "eval_runtime": 90.192,
282
+ "eval_samples_per_second": 62.655,
283
+ "eval_steps_per_second": 1.574,
284
+ "step": 756
285
+ },
286
+ {
287
+ "epoch": 27.99,
288
+ "eval_bleu": 1.1315,
289
+ "eval_gen_len": 16.2869,
290
+ "eval_loss": 3.2210450172424316,
291
+ "eval_runtime": 90.0566,
292
+ "eval_samples_per_second": 62.749,
293
+ "eval_steps_per_second": 1.577,
294
+ "step": 784
295
+ },
296
+ {
297
+ "epoch": 28.99,
298
+ "eval_bleu": 1.1366,
299
+ "eval_gen_len": 16.2984,
300
+ "eval_loss": 3.21929669380188,
301
+ "eval_runtime": 90.1542,
302
+ "eval_samples_per_second": 62.681,
303
+ "eval_steps_per_second": 1.575,
304
+ "step": 812
305
+ },
306
+ {
307
+ "epoch": 29.99,
308
+ "eval_bleu": 1.1557,
309
+ "eval_gen_len": 16.3242,
310
+ "eval_loss": 3.2176640033721924,
311
+ "eval_runtime": 89.9951,
312
+ "eval_samples_per_second": 62.792,
313
+ "eval_steps_per_second": 1.578,
314
+ "step": 840
315
+ },
316
+ {
317
+ "epoch": 30.99,
318
+ "eval_bleu": 1.1545,
319
+ "eval_gen_len": 16.378,
320
+ "eval_loss": 3.216632604598999,
321
+ "eval_runtime": 90.1702,
322
+ "eval_samples_per_second": 62.67,
323
+ "eval_steps_per_second": 1.575,
324
+ "step": 868
325
+ },
326
+ {
327
+ "epoch": 31.99,
328
+ "eval_bleu": 1.1612,
329
+ "eval_gen_len": 16.3198,
330
+ "eval_loss": 3.2149343490600586,
331
+ "eval_runtime": 90.3354,
332
+ "eval_samples_per_second": 62.556,
333
+ "eval_steps_per_second": 1.572,
334
+ "step": 896
335
+ },
336
+ {
337
+ "epoch": 32.99,
338
+ "eval_bleu": 1.1612,
339
+ "eval_gen_len": 16.3831,
340
+ "eval_loss": 3.2138538360595703,
341
+ "eval_runtime": 89.9561,
342
+ "eval_samples_per_second": 62.82,
343
+ "eval_steps_per_second": 1.579,
344
+ "step": 924
345
+ },
346
+ {
347
+ "epoch": 33.99,
348
+ "eval_bleu": 1.1849,
349
+ "eval_gen_len": 16.3297,
350
+ "eval_loss": 3.2125675678253174,
351
+ "eval_runtime": 90.1068,
352
+ "eval_samples_per_second": 62.714,
353
+ "eval_steps_per_second": 1.576,
354
+ "step": 952
355
+ },
356
+ {
357
+ "epoch": 34.99,
358
+ "eval_bleu": 1.1838,
359
+ "eval_gen_len": 16.3698,
360
+ "eval_loss": 3.211477279663086,
361
+ "eval_runtime": 90.1237,
362
+ "eval_samples_per_second": 62.703,
363
+ "eval_steps_per_second": 1.576,
364
+ "step": 980
365
+ },
366
+ {
367
+ "epoch": 35.71,
368
+ "learning_rate": 1.4285714285714285e-05,
369
+ "loss": 3.3175,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 35.99,
374
+ "eval_bleu": 1.1849,
375
+ "eval_gen_len": 16.3953,
376
+ "eval_loss": 3.2102482318878174,
377
+ "eval_runtime": 89.7847,
378
+ "eval_samples_per_second": 62.939,
379
+ "eval_steps_per_second": 1.582,
380
+ "step": 1008
381
+ },
382
+ {
383
+ "epoch": 36.99,
384
+ "eval_bleu": 1.2054,
385
+ "eval_gen_len": 16.4488,
386
+ "eval_loss": 3.2095632553100586,
387
+ "eval_runtime": 90.0872,
388
+ "eval_samples_per_second": 62.728,
389
+ "eval_steps_per_second": 1.576,
390
+ "step": 1036
391
+ },
392
+ {
393
+ "epoch": 37.99,
394
+ "eval_bleu": 1.1967,
395
+ "eval_gen_len": 16.4178,
396
+ "eval_loss": 3.208686113357544,
397
+ "eval_runtime": 90.1581,
398
+ "eval_samples_per_second": 62.679,
399
+ "eval_steps_per_second": 1.575,
400
+ "step": 1064
401
+ },
402
+ {
403
+ "epoch": 38.99,
404
+ "eval_bleu": 1.1766,
405
+ "eval_gen_len": 16.3684,
406
+ "eval_loss": 3.207756996154785,
407
+ "eval_runtime": 89.9296,
408
+ "eval_samples_per_second": 62.838,
409
+ "eval_steps_per_second": 1.579,
410
+ "step": 1092
411
+ },
412
+ {
413
+ "epoch": 39.99,
414
+ "eval_bleu": 1.1795,
415
+ "eval_gen_len": 16.4084,
416
+ "eval_loss": 3.2069926261901855,
417
+ "eval_runtime": 90.2544,
418
+ "eval_samples_per_second": 62.612,
419
+ "eval_steps_per_second": 1.573,
420
+ "step": 1120
421
+ },
422
+ {
423
+ "epoch": 40.99,
424
+ "eval_bleu": 1.1961,
425
+ "eval_gen_len": 16.4006,
426
+ "eval_loss": 3.206512928009033,
427
+ "eval_runtime": 90.0766,
428
+ "eval_samples_per_second": 62.735,
429
+ "eval_steps_per_second": 1.576,
430
+ "step": 1148
431
+ },
432
+ {
433
+ "epoch": 41.99,
434
+ "eval_bleu": 1.1815,
435
+ "eval_gen_len": 16.421,
436
+ "eval_loss": 3.205848455429077,
437
+ "eval_runtime": 90.3074,
438
+ "eval_samples_per_second": 62.575,
439
+ "eval_steps_per_second": 1.572,
440
+ "step": 1176
441
+ },
442
+ {
443
+ "epoch": 42.99,
444
+ "eval_bleu": 1.1988,
445
+ "eval_gen_len": 16.4063,
446
+ "eval_loss": 3.2054076194763184,
447
+ "eval_runtime": 90.2177,
448
+ "eval_samples_per_second": 62.637,
449
+ "eval_steps_per_second": 1.574,
450
+ "step": 1204
451
+ },
452
+ {
453
+ "epoch": 43.99,
454
+ "eval_bleu": 1.1869,
455
+ "eval_gen_len": 16.3886,
456
+ "eval_loss": 3.2050981521606445,
457
+ "eval_runtime": 90.1457,
458
+ "eval_samples_per_second": 62.687,
459
+ "eval_steps_per_second": 1.575,
460
+ "step": 1232
461
+ },
462
+ {
463
+ "epoch": 44.99,
464
+ "eval_bleu": 1.1861,
465
+ "eval_gen_len": 16.4121,
466
+ "eval_loss": 3.204746723175049,
467
+ "eval_runtime": 90.0329,
468
+ "eval_samples_per_second": 62.766,
469
+ "eval_steps_per_second": 1.577,
470
+ "step": 1260
471
+ },
472
+ {
473
+ "epoch": 45.99,
474
+ "eval_bleu": 1.1751,
475
+ "eval_gen_len": 16.3983,
476
+ "eval_loss": 3.204568386077881,
477
+ "eval_runtime": 90.1067,
478
+ "eval_samples_per_second": 62.715,
479
+ "eval_steps_per_second": 1.576,
480
+ "step": 1288
481
+ },
482
+ {
483
+ "epoch": 46.99,
484
+ "eval_bleu": 1.181,
485
+ "eval_gen_len": 16.4228,
486
+ "eval_loss": 3.2042646408081055,
487
+ "eval_runtime": 90.0428,
488
+ "eval_samples_per_second": 62.759,
489
+ "eval_steps_per_second": 1.577,
490
+ "step": 1316
491
+ },
492
+ {
493
+ "epoch": 47.99,
494
+ "eval_bleu": 1.1787,
495
+ "eval_gen_len": 16.4403,
496
+ "eval_loss": 3.204113245010376,
497
+ "eval_runtime": 90.1802,
498
+ "eval_samples_per_second": 62.663,
499
+ "eval_steps_per_second": 1.575,
500
+ "step": 1344
501
+ },
502
+ {
503
+ "epoch": 48.99,
504
+ "eval_bleu": 1.1932,
505
+ "eval_gen_len": 16.4201,
506
+ "eval_loss": 3.203984260559082,
507
+ "eval_runtime": 90.324,
508
+ "eval_samples_per_second": 62.564,
509
+ "eval_steps_per_second": 1.572,
510
+ "step": 1372
511
+ },
512
+ {
513
+ "epoch": 49.99,
514
+ "eval_bleu": 1.1935,
515
+ "eval_gen_len": 16.4247,
516
+ "eval_loss": 3.203944444656372,
517
+ "eval_runtime": 90.1016,
518
+ "eval_samples_per_second": 62.718,
519
+ "eval_steps_per_second": 1.576,
520
+ "step": 1400
521
+ },
522
+ {
523
+ "epoch": 49.99,
524
+ "step": 1400,
525
+ "total_flos": 1.2293823395620454e+17,
526
+ "train_loss": 3.3598651123046874,
527
+ "train_runtime": 21589.3175,
528
+ "train_samples_per_second": 62.573,
529
+ "train_steps_per_second": 0.065
530
+ }
531
+ ],
532
+ "max_steps": 1400,
533
+ "num_train_epochs": 50,
534
+ "total_flos": 1.2293823395620454e+17,
535
+ "trial_name": null,
536
+ "trial_params": null
537
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ceccfaa19402d803b45376b86a84465b574c624818ad1c60d125bbb43265c0
3
+ size 3960