psp_dada commited on
Commit
39bbbe8
·
1 Parent(s): 26b6f15
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data2/zhuotaotian/psp/llm/utils/models/repo/llava-v1.6-vicuna-13b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "^(?!.*vision_tower).*(?:k_proj|gate_proj|q_proj|down_proj|o_proj|up_proj|v_proj).*",
23
+ "task_type": "CAUSAL_LM",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdeaa2076f0e501a9d5035fc6ccfd231975cccc58837744966034b8bdbcf04e1
3
+ size 1001475344
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9965714285714286,
3
+ "total_flos": 2.557771887987917e+18,
4
+ "train_loss": 0.6735316744638146,
5
+ "train_runtime": 12905.5168,
6
+ "train_samples_per_second": 0.542,
7
+ "train_steps_per_second": 0.008
8
+ }
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
3
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "image_token": "<image>",
3
+ "patch_size": 14,
4
+ "processor_class": "LlavaNextProcessor",
5
+ "vision_feature_select_strategy": "default"
6
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "chat_template": "{% set system_message = 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\\'s questions.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'USER: ' + content + ' ASSISTANT:' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
41
+ "clean_up_tokenization_spaces": false,
42
+ "eos_token": "</s>",
43
+ "extra_special_tokens": {
44
+ "image_token": "<image>"
45
+ },
46
+ "image_token": "<image>",
47
+ "legacy": false,
48
+ "model_max_length": 4096,
49
+ "pad_token": "<unk>",
50
+ "padding_side": "right",
51
+ "processor_class": "LlavaNextProcessor",
52
+ "sp_model_kwargs": {},
53
+ "spaces_between_special_tokens": false,
54
+ "split_special_tokens": false,
55
+ "tokenizer_class": "LlamaTokenizer",
56
+ "unk_token": "<unk>",
57
+ "use_default_system_prompt": false
58
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9965714285714286,
3
+ "total_flos": 2.557771887987917e+18,
4
+ "train_loss": 0.6735316744638146,
5
+ "train_runtime": 12905.5168,
6
+ "train_samples_per_second": 0.542,
7
+ "train_steps_per_second": 0.008
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 109, "loss": 0.6931, "accuracy": 0.0, "lr": 2.9993770144857767e-06, "epoch": 0.009142857142857144, "percentage": 0.92, "elapsed_time": "0:01:56", "remaining_time": "3:29:52"}
2
+ {"current_steps": 2, "total_steps": 109, "loss": 0.6943, "accuracy": 0.4375, "lr": 2.997508575424375e-06, "epoch": 0.018285714285714287, "percentage": 1.83, "elapsed_time": "0:03:57", "remaining_time": "3:31:41"}
3
+ {"current_steps": 3, "total_steps": 109, "loss": 0.6956, "accuracy": 0.390625, "lr": 2.9943962348297537e-06, "epoch": 0.027428571428571427, "percentage": 2.75, "elapsed_time": "0:05:56", "remaining_time": "3:29:40"}
4
+ {"current_steps": 4, "total_steps": 109, "loss": 0.6923, "accuracy": 0.515625, "lr": 2.9900425779593876e-06, "epoch": 0.036571428571428574, "percentage": 3.67, "elapsed_time": "0:07:51", "remaining_time": "3:26:14"}
5
+ {"current_steps": 5, "total_steps": 109, "loss": 0.6914, "accuracy": 0.546875, "lr": 2.9844512211668286e-06, "epoch": 0.045714285714285714, "percentage": 4.59, "elapsed_time": "0:09:50", "remaining_time": "3:24:45"}
6
+ {"current_steps": 6, "total_steps": 109, "loss": 0.6921, "accuracy": 0.53125, "lr": 2.977626808897792e-06, "epoch": 0.054857142857142854, "percentage": 5.5, "elapsed_time": "0:11:51", "remaining_time": "3:23:33"}
7
+ {"current_steps": 7, "total_steps": 109, "loss": 0.6945, "accuracy": 0.484375, "lr": 2.9695750098322613e-06, "epoch": 0.064, "percentage": 6.42, "elapsed_time": "0:13:48", "remaining_time": "3:21:13"}
8
+ {"current_steps": 8, "total_steps": 109, "loss": 0.6917, "accuracy": 0.578125, "lr": 2.9603025121758102e-06, "epoch": 0.07314285714285715, "percentage": 7.34, "elapsed_time": "0:15:42", "remaining_time": "3:18:13"}
9
+ {"current_steps": 9, "total_steps": 109, "loss": 0.6922, "accuracy": 0.578125, "lr": 2.9498170181040663e-06, "epoch": 0.08228571428571428, "percentage": 8.26, "elapsed_time": "0:17:37", "remaining_time": "3:15:44"}
10
+ {"current_steps": 10, "total_steps": 109, "loss": 0.6949, "accuracy": 0.4375, "lr": 2.938127237364918e-06, "epoch": 0.09142857142857143, "percentage": 9.17, "elapsed_time": "0:19:33", "remaining_time": "3:13:40"}
11
+ {"current_steps": 11, "total_steps": 109, "loss": 0.6941, "accuracy": 0.484375, "lr": 2.925242880043786e-06, "epoch": 0.10057142857142858, "percentage": 10.09, "elapsed_time": "0:21:29", "remaining_time": "3:11:30"}
12
+ {"current_steps": 12, "total_steps": 109, "loss": 0.6907, "accuracy": 0.59375, "lr": 2.911174648497964e-06, "epoch": 0.10971428571428571, "percentage": 11.01, "elapsed_time": "0:23:24", "remaining_time": "3:09:10"}
13
+ {"current_steps": 13, "total_steps": 109, "loss": 0.6886, "accuracy": 0.5625, "lr": 2.895934228466738e-06, "epoch": 0.11885714285714286, "percentage": 11.93, "elapsed_time": "0:25:26", "remaining_time": "3:07:51"}
14
+ {"current_steps": 14, "total_steps": 109, "loss": 0.6917, "accuracy": 0.515625, "lr": 2.879534279364654e-06, "epoch": 0.128, "percentage": 12.84, "elapsed_time": "0:27:24", "remaining_time": "3:06:00"}
15
+ {"current_steps": 15, "total_steps": 109, "loss": 0.6907, "accuracy": 0.578125, "lr": 2.8619884237660125e-06, "epoch": 0.13714285714285715, "percentage": 13.76, "elapsed_time": "0:29:22", "remaining_time": "3:04:03"}
16
+ {"current_steps": 16, "total_steps": 109, "loss": 0.6923, "accuracy": 0.578125, "lr": 2.843311236089309e-06, "epoch": 0.1462857142857143, "percentage": 14.68, "elapsed_time": "0:31:23", "remaining_time": "3:02:26"}
17
+ {"current_steps": 17, "total_steps": 109, "loss": 0.6914, "accuracy": 0.59375, "lr": 2.8235182304910364e-06, "epoch": 0.15542857142857142, "percentage": 15.6, "elapsed_time": "0:33:22", "remaining_time": "3:00:39"}
18
+ {"current_steps": 18, "total_steps": 109, "loss": 0.6891, "accuracy": 0.640625, "lr": 2.8026258479788888e-06, "epoch": 0.16457142857142856, "percentage": 16.51, "elapsed_time": "0:35:21", "remaining_time": "2:58:43"}
19
+ {"current_steps": 19, "total_steps": 109, "loss": 0.6887, "accuracy": 0.5625, "lr": 2.780651442755083e-06, "epoch": 0.1737142857142857, "percentage": 17.43, "elapsed_time": "0:37:19", "remaining_time": "2:56:48"}
20
+ {"current_steps": 20, "total_steps": 109, "loss": 0.6886, "accuracy": 0.609375, "lr": 2.7576132678011365e-06, "epoch": 0.18285714285714286, "percentage": 18.35, "elapsed_time": "0:39:16", "remaining_time": "2:54:47"}
21
+ {"current_steps": 21, "total_steps": 109, "loss": 0.6845, "accuracy": 0.6875, "lr": 2.7335304597160764e-06, "epoch": 0.192, "percentage": 19.27, "elapsed_time": "0:41:18", "remaining_time": "2:53:07"}
22
+ {"current_steps": 22, "total_steps": 109, "loss": 0.688, "accuracy": 0.671875, "lr": 2.7084230228206746e-06, "epoch": 0.20114285714285715, "percentage": 20.18, "elapsed_time": "0:43:20", "remaining_time": "2:51:24"}
23
+ {"current_steps": 23, "total_steps": 109, "loss": 0.6897, "accuracy": 0.625, "lr": 2.6823118125409112e-06, "epoch": 0.2102857142857143, "percentage": 21.1, "elapsed_time": "0:45:16", "remaining_time": "2:49:15"}
24
+ {"current_steps": 24, "total_steps": 109, "loss": 0.6861, "accuracy": 0.671875, "lr": 2.6552185180844704e-06, "epoch": 0.21942857142857142, "percentage": 22.02, "elapsed_time": "0:47:11", "remaining_time": "2:47:07"}
25
+ {"current_steps": 25, "total_steps": 109, "loss": 0.6831, "accuracy": 0.71875, "lr": 2.6271656444246578e-06, "epoch": 0.22857142857142856, "percentage": 22.94, "elapsed_time": "0:49:13", "remaining_time": "2:45:23"}
26
+ {"current_steps": 26, "total_steps": 109, "loss": 0.6859, "accuracy": 0.5625, "lr": 2.598176493606703e-06, "epoch": 0.2377142857142857, "percentage": 23.85, "elapsed_time": "0:51:15", "remaining_time": "2:43:37"}
27
+ {"current_steps": 27, "total_steps": 109, "loss": 0.6857, "accuracy": 0.703125, "lr": 2.568275145391978e-06, "epoch": 0.24685714285714286, "percentage": 24.77, "elapsed_time": "0:53:11", "remaining_time": "2:41:32"}
28
+ {"current_steps": 28, "total_steps": 109, "loss": 0.6853, "accuracy": 0.703125, "lr": 2.5374864372562077e-06, "epoch": 0.256, "percentage": 25.69, "elapsed_time": "0:55:17", "remaining_time": "2:39:56"}
29
+ {"current_steps": 29, "total_steps": 109, "loss": 0.6838, "accuracy": 0.625, "lr": 2.505835943758286e-06, "epoch": 0.2651428571428571, "percentage": 26.61, "elapsed_time": "0:57:15", "remaining_time": "2:37:56"}
30
+ {"current_steps": 30, "total_steps": 109, "loss": 0.683, "accuracy": 0.6875, "lr": 2.4733499552968357e-06, "epoch": 0.2742857142857143, "percentage": 27.52, "elapsed_time": "0:59:12", "remaining_time": "2:35:53"}
31
+ {"current_steps": 31, "total_steps": 109, "loss": 0.6861, "accuracy": 0.65625, "lr": 2.440055456272159e-06, "epoch": 0.2834285714285714, "percentage": 28.44, "elapsed_time": "1:01:08", "remaining_time": "2:33:49"}
32
+ {"current_steps": 32, "total_steps": 109, "loss": 0.6824, "accuracy": 0.75, "lr": 2.4059801026717166e-06, "epoch": 0.2925714285714286, "percentage": 29.36, "elapsed_time": "1:03:04", "remaining_time": "2:31:47"}
33
+ {"current_steps": 33, "total_steps": 109, "loss": 0.6847, "accuracy": 0.640625, "lr": 2.3711521990977554e-06, "epoch": 0.3017142857142857, "percentage": 30.28, "elapsed_time": "1:05:01", "remaining_time": "2:29:44"}
34
+ {"current_steps": 34, "total_steps": 109, "loss": 0.6817, "accuracy": 0.75, "lr": 2.3356006752561658e-06, "epoch": 0.31085714285714283, "percentage": 31.19, "elapsed_time": "1:06:58", "remaining_time": "2:27:44"}
35
+ {"current_steps": 35, "total_steps": 109, "loss": 0.6812, "accuracy": 0.6875, "lr": 2.299355061926096e-06, "epoch": 0.32, "percentage": 32.11, "elapsed_time": "1:08:57", "remaining_time": "2:25:48"}
36
+ {"current_steps": 36, "total_steps": 109, "loss": 0.6822, "accuracy": 0.8125, "lr": 2.262445466430292e-06, "epoch": 0.3291428571428571, "percentage": 33.03, "elapsed_time": "1:10:53", "remaining_time": "2:23:45"}
37
+ {"current_steps": 37, "total_steps": 109, "loss": 0.6809, "accuracy": 0.75, "lr": 2.2249025476265262e-06, "epoch": 0.3382857142857143, "percentage": 33.94, "elapsed_time": "1:12:51", "remaining_time": "2:21:47"}
38
+ {"current_steps": 38, "total_steps": 109, "loss": 0.6797, "accuracy": 0.75, "lr": 2.1867574904409007e-06, "epoch": 0.3474285714285714, "percentage": 34.86, "elapsed_time": "1:14:48", "remaining_time": "2:19:45"}
39
+ {"current_steps": 39, "total_steps": 109, "loss": 0.6836, "accuracy": 0.703125, "lr": 2.1480419799641695e-06, "epoch": 0.3565714285714286, "percentage": 35.78, "elapsed_time": "1:16:40", "remaining_time": "2:17:37"}
40
+ {"current_steps": 40, "total_steps": 109, "loss": 0.6812, "accuracy": 0.75, "lr": 2.1087881751326035e-06, "epoch": 0.3657142857142857, "percentage": 36.7, "elapsed_time": "1:18:39", "remaining_time": "2:15:41"}
41
+ {"current_steps": 41, "total_steps": 109, "loss": 0.6726, "accuracy": 0.78125, "lr": 2.0690286820152535e-06, "epoch": 0.37485714285714283, "percentage": 37.61, "elapsed_time": "1:20:36", "remaining_time": "2:13:41"}
42
+ {"current_steps": 42, "total_steps": 109, "loss": 0.6773, "accuracy": 0.6875, "lr": 2.028796526729806e-06, "epoch": 0.384, "percentage": 38.53, "elapsed_time": "1:22:32", "remaining_time": "2:11:40"}
43
+ {"current_steps": 43, "total_steps": 109, "loss": 0.6759, "accuracy": 0.8125, "lr": 1.9881251280095263e-06, "epoch": 0.3931428571428571, "percentage": 39.45, "elapsed_time": "1:24:23", "remaining_time": "2:09:31"}
44
+ {"current_steps": 44, "total_steps": 109, "loss": 0.6802, "accuracy": 0.640625, "lr": 1.9470482694440755e-06, "epoch": 0.4022857142857143, "percentage": 40.37, "elapsed_time": "1:26:20", "remaining_time": "2:07:32"}
45
+ {"current_steps": 45, "total_steps": 109, "loss": 0.6728, "accuracy": 0.71875, "lr": 1.9056000714172617e-06, "epoch": 0.4114285714285714, "percentage": 41.28, "elapsed_time": "1:28:19", "remaining_time": "2:05:36"}
46
+ {"current_steps": 46, "total_steps": 109, "loss": 0.674, "accuracy": 0.78125, "lr": 1.8638149627650335e-06, "epoch": 0.4205714285714286, "percentage": 42.2, "elapsed_time": "1:30:14", "remaining_time": "2:03:35"}
47
+ {"current_steps": 47, "total_steps": 109, "loss": 0.6725, "accuracy": 0.765625, "lr": 1.8217276521772582e-06, "epoch": 0.4297142857142857, "percentage": 43.12, "elapsed_time": "1:32:12", "remaining_time": "2:01:38"}
48
+ {"current_steps": 48, "total_steps": 109, "loss": 0.6775, "accuracy": 0.6875, "lr": 1.7793730993670408e-06, "epoch": 0.43885714285714283, "percentage": 44.04, "elapsed_time": "1:34:09", "remaining_time": "1:59:39"}
49
+ {"current_steps": 49, "total_steps": 109, "loss": 0.6708, "accuracy": 0.703125, "lr": 1.736786486031531e-06, "epoch": 0.448, "percentage": 44.95, "elapsed_time": "1:36:10", "remaining_time": "1:57:45"}
50
+ {"current_steps": 50, "total_steps": 109, "loss": 0.6798, "accuracy": 0.671875, "lr": 1.6940031866283395e-06, "epoch": 0.45714285714285713, "percentage": 45.87, "elapsed_time": "1:38:05", "remaining_time": "1:55:45"}
51
+ {"current_steps": 51, "total_steps": 109, "loss": 0.6813, "accuracy": 0.703125, "lr": 1.6510587389918377e-06, "epoch": 0.4662857142857143, "percentage": 46.79, "elapsed_time": "1:40:05", "remaining_time": "1:53:50"}
52
+ {"current_steps": 52, "total_steps": 109, "loss": 0.6717, "accuracy": 0.796875, "lr": 1.6079888148137507e-06, "epoch": 0.4754285714285714, "percentage": 47.71, "elapsed_time": "1:42:04", "remaining_time": "1:51:53"}
53
+ {"current_steps": 53, "total_steps": 109, "loss": 0.6722, "accuracy": 0.71875, "lr": 1.564829190012561e-06, "epoch": 0.4845714285714286, "percentage": 48.62, "elapsed_time": "1:44:04", "remaining_time": "1:49:57"}
54
+ {"current_steps": 54, "total_steps": 109, "loss": 0.6777, "accuracy": 0.734375, "lr": 1.521615715016336e-06, "epoch": 0.4937142857142857, "percentage": 49.54, "elapsed_time": "1:46:04", "remaining_time": "1:48:02"}
55
+ {"current_steps": 55, "total_steps": 109, "loss": 0.668, "accuracy": 0.765625, "lr": 1.4783842849836645e-06, "epoch": 0.5028571428571429, "percentage": 50.46, "elapsed_time": "1:48:01", "remaining_time": "1:46:03"}
56
+ {"current_steps": 56, "total_steps": 109, "loss": 0.6724, "accuracy": 0.703125, "lr": 1.435170809987439e-06, "epoch": 0.512, "percentage": 51.38, "elapsed_time": "1:50:03", "remaining_time": "1:44:09"}
57
+ {"current_steps": 57, "total_steps": 109, "loss": 0.6711, "accuracy": 0.765625, "lr": 1.3920111851862494e-06, "epoch": 0.5211428571428571, "percentage": 52.29, "elapsed_time": "1:52:05", "remaining_time": "1:42:15"}
58
+ {"current_steps": 58, "total_steps": 109, "loss": 0.6737, "accuracy": 0.703125, "lr": 1.3489412610081626e-06, "epoch": 0.5302857142857142, "percentage": 53.21, "elapsed_time": "1:54:02", "remaining_time": "1:40:16"}
59
+ {"current_steps": 59, "total_steps": 109, "loss": 0.6747, "accuracy": 0.734375, "lr": 1.3059968133716607e-06, "epoch": 0.5394285714285715, "percentage": 54.13, "elapsed_time": "1:56:03", "remaining_time": "1:38:20"}
60
+ {"current_steps": 60, "total_steps": 109, "loss": 0.6663, "accuracy": 0.78125, "lr": 1.2632135139684691e-06, "epoch": 0.5485714285714286, "percentage": 55.05, "elapsed_time": "1:57:56", "remaining_time": "1:36:19"}
61
+ {"current_steps": 61, "total_steps": 109, "loss": 0.6721, "accuracy": 0.75, "lr": 1.2206269006329595e-06, "epoch": 0.5577142857142857, "percentage": 55.96, "elapsed_time": "1:59:58", "remaining_time": "1:34:24"}
62
+ {"current_steps": 62, "total_steps": 109, "loss": 0.6645, "accuracy": 0.65625, "lr": 1.178272347822742e-06, "epoch": 0.5668571428571428, "percentage": 56.88, "elapsed_time": "2:01:54", "remaining_time": "1:32:24"}
63
+ {"current_steps": 63, "total_steps": 109, "loss": 0.6714, "accuracy": 0.78125, "lr": 1.1361850372349668e-06, "epoch": 0.576, "percentage": 57.8, "elapsed_time": "2:03:55", "remaining_time": "1:30:29"}
64
+ {"current_steps": 64, "total_steps": 109, "loss": 0.6741, "accuracy": 0.671875, "lr": 1.0943999285827381e-06, "epoch": 0.5851428571428572, "percentage": 58.72, "elapsed_time": "2:05:57", "remaining_time": "1:28:34"}
65
+ {"current_steps": 65, "total_steps": 109, "loss": 0.6683, "accuracy": 0.78125, "lr": 1.0529517305559246e-06, "epoch": 0.5942857142857143, "percentage": 59.63, "elapsed_time": "2:07:53", "remaining_time": "1:26:34"}
66
+ {"current_steps": 66, "total_steps": 109, "loss": 0.6714, "accuracy": 0.734375, "lr": 1.0118748719904738e-06, "epoch": 0.6034285714285714, "percentage": 60.55, "elapsed_time": "2:09:48", "remaining_time": "1:24:34"}
67
+ {"current_steps": 67, "total_steps": 109, "loss": 0.6711, "accuracy": 0.734375, "lr": 9.712034732701943e-07, "epoch": 0.6125714285714285, "percentage": 61.47, "elapsed_time": "2:11:43", "remaining_time": "1:22:34"}
68
+ {"current_steps": 68, "total_steps": 109, "loss": 0.6644, "accuracy": 0.765625, "lr": 9.309713179847465e-07, "epoch": 0.6217142857142857, "percentage": 62.39, "elapsed_time": "2:13:38", "remaining_time": "1:20:34"}
69
+ {"current_steps": 69, "total_steps": 109, "loss": 0.6529, "accuracy": 0.8125, "lr": 8.912118248673967e-07, "epoch": 0.6308571428571429, "percentage": 63.3, "elapsed_time": "2:15:39", "remaining_time": "1:18:38"}
70
+ {"current_steps": 70, "total_steps": 109, "loss": 0.6644, "accuracy": 0.765625, "lr": 8.519580200358309e-07, "epoch": 0.64, "percentage": 64.22, "elapsed_time": "2:17:42", "remaining_time": "1:16:43"}
71
+ {"current_steps": 71, "total_steps": 109, "loss": 0.6612, "accuracy": 0.796875, "lr": 8.132425095591e-07, "epoch": 0.6491428571428571, "percentage": 65.14, "elapsed_time": "2:19:39", "remaining_time": "1:14:44"}
72
+ {"current_steps": 72, "total_steps": 109, "loss": 0.6564, "accuracy": 0.78125, "lr": 7.750974523734742e-07, "epoch": 0.6582857142857143, "percentage": 66.06, "elapsed_time": "2:21:34", "remaining_time": "1:12:45"}
73
+ {"current_steps": 73, "total_steps": 109, "loss": 0.662, "accuracy": 0.78125, "lr": 7.375545335697085e-07, "epoch": 0.6674285714285715, "percentage": 66.97, "elapsed_time": "2:23:30", "remaining_time": "1:10:46"}
74
+ {"current_steps": 74, "total_steps": 109, "loss": 0.6528, "accuracy": 0.8125, "lr": 7.00644938073904e-07, "epoch": 0.6765714285714286, "percentage": 67.89, "elapsed_time": "2:25:34", "remaining_time": "1:08:51"}
75
+ {"current_steps": 75, "total_steps": 109, "loss": 0.6607, "accuracy": 0.8125, "lr": 6.643993247438348e-07, "epoch": 0.6857142857142857, "percentage": 68.81, "elapsed_time": "2:27:28", "remaining_time": "1:06:51"}
76
+ {"current_steps": 76, "total_steps": 109, "loss": 0.6577, "accuracy": 0.84375, "lr": 6.288478009022447e-07, "epoch": 0.6948571428571428, "percentage": 69.72, "elapsed_time": "2:29:27", "remaining_time": "1:04:53"}
77
+ {"current_steps": 77, "total_steps": 109, "loss": 0.6645, "accuracy": 0.6875, "lr": 5.940198973282838e-07, "epoch": 0.704, "percentage": 70.64, "elapsed_time": "2:31:26", "remaining_time": "1:02:56"}
78
+ {"current_steps": 78, "total_steps": 109, "loss": 0.6567, "accuracy": 0.765625, "lr": 5.599445437278412e-07, "epoch": 0.7131428571428572, "percentage": 71.56, "elapsed_time": "2:33:23", "remaining_time": "1:00:57"}
79
+ {"current_steps": 79, "total_steps": 109, "loss": 0.6655, "accuracy": 0.78125, "lr": 5.266500447031646e-07, "epoch": 0.7222857142857143, "percentage": 72.48, "elapsed_time": "2:35:21", "remaining_time": "0:58:59"}
80
+ {"current_steps": 80, "total_steps": 109, "loss": 0.657, "accuracy": 0.8125, "lr": 4.941640562417138e-07, "epoch": 0.7314285714285714, "percentage": 73.39, "elapsed_time": "2:37:18", "remaining_time": "0:57:01"}
81
+ {"current_steps": 81, "total_steps": 109, "loss": 0.6587, "accuracy": 0.796875, "lr": 4.6251356274379226e-07, "epoch": 0.7405714285714285, "percentage": 74.31, "elapsed_time": "2:39:14", "remaining_time": "0:55:02"}
82
+ {"current_steps": 82, "total_steps": 109, "loss": 0.6658, "accuracy": 0.75, "lr": 4.317248546080218e-07, "epoch": 0.7497142857142857, "percentage": 75.23, "elapsed_time": "2:41:13", "remaining_time": "0:53:05"}
83
+ {"current_steps": 83, "total_steps": 109, "loss": 0.6629, "accuracy": 0.6875, "lr": 4.018235063932971e-07, "epoch": 0.7588571428571429, "percentage": 76.15, "elapsed_time": "2:43:07", "remaining_time": "0:51:05"}
84
+ {"current_steps": 84, "total_steps": 109, "loss": 0.6603, "accuracy": 0.796875, "lr": 3.7283435557534184e-07, "epoch": 0.768, "percentage": 77.06, "elapsed_time": "2:45:05", "remaining_time": "0:49:08"}
85
+ {"current_steps": 85, "total_steps": 109, "loss": 0.6606, "accuracy": 0.765625, "lr": 3.447814819155292e-07, "epoch": 0.7771428571428571, "percentage": 77.98, "elapsed_time": "2:47:04", "remaining_time": "0:47:10"}
86
+ {"current_steps": 86, "total_steps": 109, "loss": 0.6615, "accuracy": 0.734375, "lr": 3.1768818745908876e-07, "epoch": 0.7862857142857143, "percentage": 78.9, "elapsed_time": "2:49:04", "remaining_time": "0:45:12"}
87
+ {"current_steps": 87, "total_steps": 109, "loss": 0.6599, "accuracy": 0.71875, "lr": 2.915769771793256e-07, "epoch": 0.7954285714285714, "percentage": 79.82, "elapsed_time": "2:51:01", "remaining_time": "0:43:14"}
88
+ {"current_steps": 88, "total_steps": 109, "loss": 0.6714, "accuracy": 0.703125, "lr": 2.6646954028392375e-07, "epoch": 0.8045714285714286, "percentage": 80.73, "elapsed_time": "2:52:57", "remaining_time": "0:41:16"}
89
+ {"current_steps": 89, "total_steps": 109, "loss": 0.6497, "accuracy": 0.859375, "lr": 2.4238673219886385e-07, "epoch": 0.8137142857142857, "percentage": 81.65, "elapsed_time": "2:54:51", "remaining_time": "0:39:17"}
90
+ {"current_steps": 90, "total_steps": 109, "loss": 0.6582, "accuracy": 0.8125, "lr": 2.1934855724491708e-07, "epoch": 0.8228571428571428, "percentage": 82.57, "elapsed_time": "2:56:49", "remaining_time": "0:37:19"}
91
+ {"current_steps": 91, "total_steps": 109, "loss": 0.6614, "accuracy": 0.765625, "lr": 1.9737415202111148e-07, "epoch": 0.832, "percentage": 83.49, "elapsed_time": "2:58:49", "remaining_time": "0:35:22"}
92
+ {"current_steps": 92, "total_steps": 109, "loss": 0.6552, "accuracy": 0.78125, "lr": 1.764817695089636e-07, "epoch": 0.8411428571428572, "percentage": 84.4, "elapsed_time": "3:00:44", "remaining_time": "0:33:23"}
93
+ {"current_steps": 93, "total_steps": 109, "loss": 0.6592, "accuracy": 0.703125, "lr": 1.566887639106911e-07, "epoch": 0.8502857142857143, "percentage": 85.32, "elapsed_time": "3:02:40", "remaining_time": "0:31:25"}
94
+ {"current_steps": 94, "total_steps": 109, "loss": 0.6657, "accuracy": 0.796875, "lr": 1.380115762339877e-07, "epoch": 0.8594285714285714, "percentage": 86.24, "elapsed_time": "3:04:39", "remaining_time": "0:29:27"}
95
+ {"current_steps": 95, "total_steps": 109, "loss": 0.6538, "accuracy": 0.84375, "lr": 1.204657206353459e-07, "epoch": 0.8685714285714285, "percentage": 87.16, "elapsed_time": "3:06:43", "remaining_time": "0:27:31"}
96
+ {"current_steps": 96, "total_steps": 109, "loss": 0.664, "accuracy": 0.796875, "lr": 1.0406577153326192e-07, "epoch": 0.8777142857142857, "percentage": 88.07, "elapsed_time": "3:08:42", "remaining_time": "0:25:33"}
97
+ {"current_steps": 97, "total_steps": 109, "loss": 0.6702, "accuracy": 0.703125, "lr": 8.882535150203569e-08, "epoch": 0.8868571428571429, "percentage": 88.99, "elapsed_time": "3:10:38", "remaining_time": "0:23:35"}
98
+ {"current_steps": 98, "total_steps": 109, "loss": 0.6603, "accuracy": 0.75, "lr": 7.475711995621387e-08, "epoch": 0.896, "percentage": 89.91, "elapsed_time": "3:12:40", "remaining_time": "0:21:37"}
99
+ {"current_steps": 99, "total_steps": 109, "loss": 0.6673, "accuracy": 0.71875, "lr": 6.187276263508168e-08, "epoch": 0.9051428571428571, "percentage": 90.83, "elapsed_time": "3:14:37", "remaining_time": "0:19:39"}
100
+ {"current_steps": 100, "total_steps": 109, "loss": 0.6531, "accuracy": 0.78125, "lr": 5.018298189593368e-08, "epoch": 0.9142857142857143, "percentage": 91.74, "elapsed_time": "3:16:40", "remaining_time": "0:17:42"}
101
+ {"current_steps": 101, "total_steps": 109, "loss": 0.6593, "accuracy": 0.796875, "lr": 3.969748782418991e-08, "epoch": 0.9234285714285714, "percentage": 92.66, "elapsed_time": "3:18:44", "remaining_time": "0:15:44"}
102
+ {"current_steps": 102, "total_steps": 109, "loss": 0.6617, "accuracy": 0.765625, "lr": 3.042499016773881e-08, "epoch": 0.9325714285714286, "percentage": 93.58, "elapsed_time": "3:20:45", "remaining_time": "0:13:46"}
103
+ {"current_steps": 103, "total_steps": 109, "loss": 0.6494, "accuracy": 0.8125, "lr": 2.2373191102207647e-08, "epoch": 0.9417142857142857, "percentage": 94.5, "elapsed_time": "3:22:50", "remaining_time": "0:11:48"}
104
+ {"current_steps": 104, "total_steps": 109, "loss": 0.6539, "accuracy": 0.828125, "lr": 1.5548778833171463e-08, "epoch": 0.9508571428571428, "percentage": 95.41, "elapsed_time": "3:24:51", "remaining_time": "0:09:50"}
105
+ {"current_steps": 105, "total_steps": 109, "loss": 0.6662, "accuracy": 0.71875, "lr": 9.957422040612507e-09, "epoch": 0.96, "percentage": 96.33, "elapsed_time": "3:26:52", "remaining_time": "0:07:52"}
106
+ {"current_steps": 106, "total_steps": 109, "loss": 0.661, "accuracy": 0.78125, "lr": 5.6037651702463e-09, "epoch": 0.9691428571428572, "percentage": 97.25, "elapsed_time": "3:28:47", "remaining_time": "0:05:54"}
107
+ {"current_steps": 107, "total_steps": 109, "loss": 0.6654, "accuracy": 0.796875, "lr": 2.491424575625123e-09, "epoch": 0.9782857142857143, "percentage": 98.17, "elapsed_time": "3:30:45", "remaining_time": "0:03:56"}
108
+ {"current_steps": 108, "total_steps": 109, "loss": 0.6522, "accuracy": 0.8125, "lr": 6.229855142232399e-10, "epoch": 0.9874285714285714, "percentage": 99.08, "elapsed_time": "3:32:44", "remaining_time": "0:01:58"}
109
+ {"current_steps": 109, "total_steps": 109, "loss": 0.6582, "accuracy": 0.796875, "lr": 0.0, "epoch": 0.9965714285714286, "percentage": 100.0, "elapsed_time": "3:34:43", "remaining_time": "0:00:00"}
110
+ {"current_steps": 109, "total_steps": 109, "epoch": 0.9965714285714286, "percentage": 100.0, "elapsed_time": "3:35:05", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,1677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9965714285714286,
5
+ "eval_steps": 500,
6
+ "global_step": 109,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.009142857142857144,
13
+ "grad_norm": 0.6255323886871338,
14
+ "learning_rate": 2.9993770144857767e-06,
15
+ "logits/chosen": -2.1389834880828857,
16
+ "logits/rejected": -2.141430139541626,
17
+ "logps/chosen": -19.425989151000977,
18
+ "logps/rejected": -21.582773208618164,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.018285714285714287,
28
+ "grad_norm": 0.6554356217384338,
29
+ "learning_rate": 2.997508575424375e-06,
30
+ "logits/chosen": -2.1365740299224854,
31
+ "logits/rejected": -2.1396265029907227,
32
+ "logps/chosen": -20.762622833251953,
33
+ "logps/rejected": -22.60515785217285,
34
+ "loss": 0.6943,
35
+ "rewards/accuracies": 0.4375,
36
+ "rewards/chosen": -0.0017696216236799955,
37
+ "rewards/margins": -0.002286846749484539,
38
+ "rewards/rejected": 0.0005172253004275262,
39
+ "step": 2
40
+ },
41
+ {
42
+ "epoch": 0.027428571428571427,
43
+ "grad_norm": 0.6278586387634277,
44
+ "learning_rate": 2.9943962348297537e-06,
45
+ "logits/chosen": -2.1222903728485107,
46
+ "logits/rejected": -2.125791549682617,
47
+ "logps/chosen": -21.520599365234375,
48
+ "logps/rejected": -24.2766170501709,
49
+ "loss": 0.6956,
50
+ "rewards/accuracies": 0.390625,
51
+ "rewards/chosen": -0.0007706253090873361,
52
+ "rewards/margins": -0.0048321266658604145,
53
+ "rewards/rejected": 0.004061501007527113,
54
+ "step": 3
55
+ },
56
+ {
57
+ "epoch": 0.036571428571428574,
58
+ "grad_norm": 0.6467044949531555,
59
+ "learning_rate": 2.9900425779593876e-06,
60
+ "logits/chosen": -2.1400036811828613,
61
+ "logits/rejected": -2.1466779708862305,
62
+ "logps/chosen": -19.16310691833496,
63
+ "logps/rejected": -25.431270599365234,
64
+ "loss": 0.6923,
65
+ "rewards/accuracies": 0.515625,
66
+ "rewards/chosen": 0.0013154743937775493,
67
+ "rewards/margins": 0.00192451779730618,
68
+ "rewards/rejected": -0.0006090432871133089,
69
+ "step": 4
70
+ },
71
+ {
72
+ "epoch": 0.045714285714285714,
73
+ "grad_norm": 0.6181118488311768,
74
+ "learning_rate": 2.9844512211668286e-06,
75
+ "logits/chosen": -2.1338605880737305,
76
+ "logits/rejected": -2.137308359146118,
77
+ "logps/chosen": -20.26681137084961,
78
+ "logps/rejected": -21.6939754486084,
79
+ "loss": 0.6914,
80
+ "rewards/accuracies": 0.546875,
81
+ "rewards/chosen": 0.0007669397164136171,
82
+ "rewards/margins": 0.003516831435263157,
83
+ "rewards/rejected": -0.002749891486018896,
84
+ "step": 5
85
+ },
86
+ {
87
+ "epoch": 0.054857142857142854,
88
+ "grad_norm": 0.6369356513023376,
89
+ "learning_rate": 2.977626808897792e-06,
90
+ "logits/chosen": -2.148895740509033,
91
+ "logits/rejected": -2.151296377182007,
92
+ "logps/chosen": -19.613313674926758,
93
+ "logps/rejected": -21.868637084960938,
94
+ "loss": 0.6921,
95
+ "rewards/accuracies": 0.53125,
96
+ "rewards/chosen": 0.0020878687500953674,
97
+ "rewards/margins": 0.002298696432262659,
98
+ "rewards/rejected": -0.00021082756575196981,
99
+ "step": 6
100
+ },
101
+ {
102
+ "epoch": 0.064,
103
+ "grad_norm": 0.643375813961029,
104
+ "learning_rate": 2.9695750098322613e-06,
105
+ "logits/chosen": -2.154219150543213,
106
+ "logits/rejected": -2.1564598083496094,
107
+ "logps/chosen": -19.349716186523438,
108
+ "logps/rejected": -22.341163635253906,
109
+ "loss": 0.6945,
110
+ "rewards/accuracies": 0.484375,
111
+ "rewards/chosen": -0.004500311333686113,
112
+ "rewards/margins": -0.0025411711540073156,
113
+ "rewards/rejected": -0.001959140645340085,
114
+ "step": 7
115
+ },
116
+ {
117
+ "epoch": 0.07314285714285715,
118
+ "grad_norm": 0.6222244501113892,
119
+ "learning_rate": 2.9603025121758102e-06,
120
+ "logits/chosen": -2.126340627670288,
121
+ "logits/rejected": -2.130244731903076,
122
+ "logps/chosen": -19.825477600097656,
123
+ "logps/rejected": -23.661293029785156,
124
+ "loss": 0.6917,
125
+ "rewards/accuracies": 0.578125,
126
+ "rewards/chosen": 0.0011193343671038747,
127
+ "rewards/margins": 0.0031258827075362206,
128
+ "rewards/rejected": -0.0020065484568476677,
129
+ "step": 8
130
+ },
131
+ {
132
+ "epoch": 0.08228571428571428,
133
+ "grad_norm": 0.5961363911628723,
134
+ "learning_rate": 2.9498170181040663e-06,
135
+ "logits/chosen": -2.14841365814209,
136
+ "logits/rejected": -2.14998197555542,
137
+ "logps/chosen": -17.929092407226562,
138
+ "logps/rejected": -19.984407424926758,
139
+ "loss": 0.6922,
140
+ "rewards/accuracies": 0.578125,
141
+ "rewards/chosen": -0.0016110084252431989,
142
+ "rewards/margins": 0.001981835812330246,
143
+ "rewards/rejected": -0.003592844121158123,
144
+ "step": 9
145
+ },
146
+ {
147
+ "epoch": 0.09142857142857143,
148
+ "grad_norm": 0.6394132375717163,
149
+ "learning_rate": 2.938127237364918e-06,
150
+ "logits/chosen": -2.1390151977539062,
151
+ "logits/rejected": -2.14105486869812,
152
+ "logps/chosen": -19.9459228515625,
153
+ "logps/rejected": -21.593914031982422,
154
+ "loss": 0.6949,
155
+ "rewards/accuracies": 0.4375,
156
+ "rewards/chosen": -0.0058130137622356415,
157
+ "rewards/margins": -0.003376076463609934,
158
+ "rewards/rejected": -0.0024369372986257076,
159
+ "step": 10
160
+ },
161
+ {
162
+ "epoch": 0.10057142857142858,
163
+ "grad_norm": 0.637840986251831,
164
+ "learning_rate": 2.925242880043786e-06,
165
+ "logits/chosen": -2.1370978355407715,
166
+ "logits/rejected": -2.1393895149230957,
167
+ "logps/chosen": -20.649080276489258,
168
+ "logps/rejected": -23.88674545288086,
169
+ "loss": 0.6941,
170
+ "rewards/accuracies": 0.484375,
171
+ "rewards/chosen": -0.0013475671876221895,
172
+ "rewards/margins": -0.0018120227614417672,
173
+ "rewards/rejected": 0.0004644556902348995,
174
+ "step": 11
175
+ },
176
+ {
177
+ "epoch": 0.10971428571428571,
178
+ "grad_norm": 0.624940037727356,
179
+ "learning_rate": 2.911174648497964e-06,
180
+ "logits/chosen": -2.1435601711273193,
181
+ "logits/rejected": -2.146998882293701,
182
+ "logps/chosen": -19.336463928222656,
183
+ "logps/rejected": -22.77804183959961,
184
+ "loss": 0.6907,
185
+ "rewards/accuracies": 0.59375,
186
+ "rewards/chosen": -0.0014964112779125571,
187
+ "rewards/margins": 0.005147767253220081,
188
+ "rewards/rejected": -0.006644178181886673,
189
+ "step": 12
190
+ },
191
+ {
192
+ "epoch": 0.11885714285714286,
193
+ "grad_norm": 0.6471104621887207,
194
+ "learning_rate": 2.895934228466738e-06,
195
+ "logits/chosen": -2.136577606201172,
196
+ "logits/rejected": -2.1388959884643555,
197
+ "logps/chosen": -20.625932693481445,
198
+ "logps/rejected": -23.377975463867188,
199
+ "loss": 0.6886,
200
+ "rewards/accuracies": 0.5625,
201
+ "rewards/chosen": 0.0014629564248025417,
202
+ "rewards/margins": 0.00928102433681488,
203
+ "rewards/rejected": -0.007818068377673626,
204
+ "step": 13
205
+ },
206
+ {
207
+ "epoch": 0.128,
208
+ "grad_norm": 0.629192054271698,
209
+ "learning_rate": 2.879534279364654e-06,
210
+ "logits/chosen": -2.1251070499420166,
211
+ "logits/rejected": -2.1296639442443848,
212
+ "logps/chosen": -17.793655395507812,
213
+ "logps/rejected": -24.011507034301758,
214
+ "loss": 0.6917,
215
+ "rewards/accuracies": 0.515625,
216
+ "rewards/chosen": -0.000675417366437614,
217
+ "rewards/margins": 0.0030865983571857214,
218
+ "rewards/rejected": -0.0037620156072080135,
219
+ "step": 14
220
+ },
221
+ {
222
+ "epoch": 0.13714285714285715,
223
+ "grad_norm": 0.659120500087738,
224
+ "learning_rate": 2.8619884237660125e-06,
225
+ "logits/chosen": -2.1358160972595215,
226
+ "logits/rejected": -2.142625331878662,
227
+ "logps/chosen": -18.37673568725586,
228
+ "logps/rejected": -25.251014709472656,
229
+ "loss": 0.6907,
230
+ "rewards/accuracies": 0.578125,
231
+ "rewards/chosen": -0.00043936213478446007,
232
+ "rewards/margins": 0.004951969254761934,
233
+ "rewards/rejected": -0.005391330923885107,
234
+ "step": 15
235
+ },
236
+ {
237
+ "epoch": 0.1462857142857143,
238
+ "grad_norm": 0.6336076259613037,
239
+ "learning_rate": 2.843311236089309e-06,
240
+ "logits/chosen": -2.1342644691467285,
241
+ "logits/rejected": -2.1355390548706055,
242
+ "logps/chosen": -20.63397216796875,
243
+ "logps/rejected": -21.67581558227539,
244
+ "loss": 0.6923,
245
+ "rewards/accuracies": 0.578125,
246
+ "rewards/chosen": -0.006054366007447243,
247
+ "rewards/margins": 0.0018282074015587568,
248
+ "rewards/rejected": -0.007882573641836643,
249
+ "step": 16
250
+ },
251
+ {
252
+ "epoch": 0.15542857142857142,
253
+ "grad_norm": 0.6278834342956543,
254
+ "learning_rate": 2.8235182304910364e-06,
255
+ "logits/chosen": -2.1471428871154785,
256
+ "logits/rejected": -2.148350477218628,
257
+ "logps/chosen": -21.62627410888672,
258
+ "logps/rejected": -22.867630004882812,
259
+ "loss": 0.6914,
260
+ "rewards/accuracies": 0.59375,
261
+ "rewards/chosen": -0.002310897456482053,
262
+ "rewards/margins": 0.00360050518065691,
263
+ "rewards/rejected": -0.005911402404308319,
264
+ "step": 17
265
+ },
266
+ {
267
+ "epoch": 0.16457142857142856,
268
+ "grad_norm": 0.6396936178207397,
269
+ "learning_rate": 2.8026258479788888e-06,
270
+ "logits/chosen": -2.131674289703369,
271
+ "logits/rejected": -2.1344425678253174,
272
+ "logps/chosen": -17.968589782714844,
273
+ "logps/rejected": -23.94507598876953,
274
+ "loss": 0.6891,
275
+ "rewards/accuracies": 0.640625,
276
+ "rewards/chosen": -0.0026061469689011574,
277
+ "rewards/margins": 0.008287503384053707,
278
+ "rewards/rejected": -0.010893651284277439,
279
+ "step": 18
280
+ },
281
+ {
282
+ "epoch": 0.1737142857142857,
283
+ "grad_norm": 0.6427000164985657,
284
+ "learning_rate": 2.780651442755083e-06,
285
+ "logits/chosen": -2.1325266361236572,
286
+ "logits/rejected": -2.1359243392944336,
287
+ "logps/chosen": -19.952186584472656,
288
+ "logps/rejected": -20.840421676635742,
289
+ "loss": 0.6887,
290
+ "rewards/accuracies": 0.5625,
291
+ "rewards/chosen": 0.002477221190929413,
292
+ "rewards/margins": 0.009127501398324966,
293
+ "rewards/rejected": -0.006650280207395554,
294
+ "step": 19
295
+ },
296
+ {
297
+ "epoch": 0.18285714285714286,
298
+ "grad_norm": 0.634149968624115,
299
+ "learning_rate": 2.7576132678011365e-06,
300
+ "logits/chosen": -2.137594223022461,
301
+ "logits/rejected": -2.1397337913513184,
302
+ "logps/chosen": -20.24038314819336,
303
+ "logps/rejected": -21.273605346679688,
304
+ "loss": 0.6886,
305
+ "rewards/accuracies": 0.609375,
306
+ "rewards/chosen": -0.0009703578543849289,
307
+ "rewards/margins": 0.009282448329031467,
308
+ "rewards/rejected": -0.010252806358039379,
309
+ "step": 20
310
+ },
311
+ {
312
+ "epoch": 0.192,
313
+ "grad_norm": 0.7092023491859436,
314
+ "learning_rate": 2.7335304597160764e-06,
315
+ "logits/chosen": -2.1394314765930176,
316
+ "logits/rejected": -2.1454715728759766,
317
+ "logps/chosen": -21.92709732055664,
318
+ "logps/rejected": -28.169654846191406,
319
+ "loss": 0.6845,
320
+ "rewards/accuracies": 0.6875,
321
+ "rewards/chosen": 0.0013219192624092102,
322
+ "rewards/margins": 0.017537159845232964,
323
+ "rewards/rejected": -0.016215242445468903,
324
+ "step": 21
325
+ },
326
+ {
327
+ "epoch": 0.20114285714285715,
328
+ "grad_norm": 0.6428853869438171,
329
+ "learning_rate": 2.7084230228206746e-06,
330
+ "logits/chosen": -2.1274845600128174,
331
+ "logits/rejected": -2.128504991531372,
332
+ "logps/chosen": -19.982959747314453,
333
+ "logps/rejected": -23.259571075439453,
334
+ "loss": 0.688,
335
+ "rewards/accuracies": 0.671875,
336
+ "rewards/chosen": -0.0038342936895787716,
337
+ "rewards/margins": 0.010476754978299141,
338
+ "rewards/rejected": -0.0143110491335392,
339
+ "step": 22
340
+ },
341
+ {
342
+ "epoch": 0.2102857142857143,
343
+ "grad_norm": 0.6467615962028503,
344
+ "learning_rate": 2.6823118125409112e-06,
345
+ "logits/chosen": -2.1434879302978516,
346
+ "logits/rejected": -2.14566707611084,
347
+ "logps/chosen": -20.100147247314453,
348
+ "logps/rejected": -23.975025177001953,
349
+ "loss": 0.6897,
350
+ "rewards/accuracies": 0.625,
351
+ "rewards/chosen": -0.001528523163869977,
352
+ "rewards/margins": 0.0071428027004003525,
353
+ "rewards/rejected": -0.008671325631439686,
354
+ "step": 23
355
+ },
356
+ {
357
+ "epoch": 0.21942857142857142,
358
+ "grad_norm": 0.6638103127479553,
359
+ "learning_rate": 2.6552185180844704e-06,
360
+ "logits/chosen": -2.1213717460632324,
361
+ "logits/rejected": -2.1236109733581543,
362
+ "logps/chosen": -21.576557159423828,
363
+ "logps/rejected": -23.23206329345703,
364
+ "loss": 0.6861,
365
+ "rewards/accuracies": 0.671875,
366
+ "rewards/chosen": 0.0032195569947361946,
367
+ "rewards/margins": 0.01447179913520813,
368
+ "rewards/rejected": -0.011252242140471935,
369
+ "step": 24
370
+ },
371
+ {
372
+ "epoch": 0.22857142857142856,
373
+ "grad_norm": 0.6348288655281067,
374
+ "learning_rate": 2.6271656444246578e-06,
375
+ "logits/chosen": -2.1333892345428467,
376
+ "logits/rejected": -2.1361846923828125,
377
+ "logps/chosen": -19.42316436767578,
378
+ "logps/rejected": -22.707563400268555,
379
+ "loss": 0.6831,
380
+ "rewards/accuracies": 0.71875,
381
+ "rewards/chosen": 0.0022394396364688873,
382
+ "rewards/margins": 0.020372504368424416,
383
+ "rewards/rejected": -0.018133066594600677,
384
+ "step": 25
385
+ },
386
+ {
387
+ "epoch": 0.2377142857142857,
388
+ "grad_norm": 0.6526222825050354,
389
+ "learning_rate": 2.598176493606703e-06,
390
+ "logits/chosen": -2.1356377601623535,
391
+ "logits/rejected": -2.1370201110839844,
392
+ "logps/chosen": -20.537616729736328,
393
+ "logps/rejected": -24.898578643798828,
394
+ "loss": 0.6859,
395
+ "rewards/accuracies": 0.5625,
396
+ "rewards/chosen": -0.002131823683157563,
397
+ "rewards/margins": 0.014900727197527885,
398
+ "rewards/rejected": -0.017032550647854805,
399
+ "step": 26
400
+ },
401
+ {
402
+ "epoch": 0.24685714285714286,
403
+ "grad_norm": 0.6682783365249634,
404
+ "learning_rate": 2.568275145391978e-06,
405
+ "logits/chosen": -2.1460518836975098,
406
+ "logits/rejected": -2.1491003036499023,
407
+ "logps/chosen": -20.905759811401367,
408
+ "logps/rejected": -24.251680374145508,
409
+ "loss": 0.6857,
410
+ "rewards/accuracies": 0.703125,
411
+ "rewards/chosen": 0.0005356475012376904,
412
+ "rewards/margins": 0.015149888582527637,
413
+ "rewards/rejected": -0.014614241197705269,
414
+ "step": 27
415
+ },
416
+ {
417
+ "epoch": 0.256,
418
+ "grad_norm": 0.6456180214881897,
419
+ "learning_rate": 2.5374864372562077e-06,
420
+ "logits/chosen": -2.1365909576416016,
421
+ "logits/rejected": -2.1375560760498047,
422
+ "logps/chosen": -21.477279663085938,
423
+ "logps/rejected": -22.589874267578125,
424
+ "loss": 0.6853,
425
+ "rewards/accuracies": 0.703125,
426
+ "rewards/chosen": -0.0013170776655897498,
427
+ "rewards/margins": 0.01594378799200058,
428
+ "rewards/rejected": -0.01726086437702179,
429
+ "step": 28
430
+ },
431
+ {
432
+ "epoch": 0.2651428571428571,
433
+ "grad_norm": 0.6599003672599792,
434
+ "learning_rate": 2.505835943758286e-06,
435
+ "logits/chosen": -2.1302995681762695,
436
+ "logits/rejected": -2.1338701248168945,
437
+ "logps/chosen": -20.774627685546875,
438
+ "logps/rejected": -24.625228881835938,
439
+ "loss": 0.6838,
440
+ "rewards/accuracies": 0.625,
441
+ "rewards/chosen": 0.0004698322154581547,
442
+ "rewards/margins": 0.019142411649227142,
443
+ "rewards/rejected": -0.018672579899430275,
444
+ "step": 29
445
+ },
446
+ {
447
+ "epoch": 0.2742857142857143,
448
+ "grad_norm": 0.6650639176368713,
449
+ "learning_rate": 2.4733499552968357e-06,
450
+ "logits/chosen": -2.1260218620300293,
451
+ "logits/rejected": -2.128187894821167,
452
+ "logps/chosen": -20.981136322021484,
453
+ "logps/rejected": -23.799392700195312,
454
+ "loss": 0.683,
455
+ "rewards/accuracies": 0.6875,
456
+ "rewards/chosen": 0.00021876831306144595,
457
+ "rewards/margins": 0.020755982026457787,
458
+ "rewards/rejected": -0.02053721249103546,
459
+ "step": 30
460
+ },
461
+ {
462
+ "epoch": 0.2834285714285714,
463
+ "grad_norm": 0.6870555877685547,
464
+ "learning_rate": 2.440055456272159e-06,
465
+ "logits/chosen": -2.1325454711914062,
466
+ "logits/rejected": -2.1314170360565186,
467
+ "logps/chosen": -20.572166442871094,
468
+ "logps/rejected": -19.940898895263672,
469
+ "loss": 0.6861,
470
+ "rewards/accuracies": 0.65625,
471
+ "rewards/chosen": -0.00320088560692966,
472
+ "rewards/margins": 0.014535932801663876,
473
+ "rewards/rejected": -0.01773681864142418,
474
+ "step": 31
475
+ },
476
+ {
477
+ "epoch": 0.2925714285714286,
478
+ "grad_norm": 0.6859702467918396,
479
+ "learning_rate": 2.4059801026717166e-06,
480
+ "logits/chosen": -2.138218402862549,
481
+ "logits/rejected": -2.1400537490844727,
482
+ "logps/chosen": -20.59479331970215,
483
+ "logps/rejected": -24.294113159179688,
484
+ "loss": 0.6824,
485
+ "rewards/accuracies": 0.75,
486
+ "rewards/chosen": 0.0018003403674811125,
487
+ "rewards/margins": 0.02209661900997162,
488
+ "rewards/rejected": -0.02029627561569214,
489
+ "step": 32
490
+ },
491
+ {
492
+ "epoch": 0.3017142857142857,
493
+ "grad_norm": 0.6709543466567993,
494
+ "learning_rate": 2.3711521990977554e-06,
495
+ "logits/chosen": -2.134920120239258,
496
+ "logits/rejected": -2.137303352355957,
497
+ "logps/chosen": -21.195552825927734,
498
+ "logps/rejected": -24.645339965820312,
499
+ "loss": 0.6847,
500
+ "rewards/accuracies": 0.640625,
501
+ "rewards/chosen": -0.00019013590645045042,
502
+ "rewards/margins": 0.0173664353787899,
503
+ "rewards/rejected": -0.01755657233297825,
504
+ "step": 33
505
+ },
506
+ {
507
+ "epoch": 0.31085714285714283,
508
+ "grad_norm": 0.6602835655212402,
509
+ "learning_rate": 2.3356006752561658e-06,
510
+ "logits/chosen": -2.1185295581817627,
511
+ "logits/rejected": -2.122647762298584,
512
+ "logps/chosen": -17.77151870727539,
513
+ "logps/rejected": -25.318552017211914,
514
+ "loss": 0.6817,
515
+ "rewards/accuracies": 0.75,
516
+ "rewards/chosen": 0.0011812887387350202,
517
+ "rewards/margins": 0.023368019610643387,
518
+ "rewards/rejected": -0.022186731919646263,
519
+ "step": 34
520
+ },
521
+ {
522
+ "epoch": 0.32,
523
+ "grad_norm": 0.6702331900596619,
524
+ "learning_rate": 2.299355061926096e-06,
525
+ "logits/chosen": -2.1439552307128906,
526
+ "logits/rejected": -2.148176670074463,
527
+ "logps/chosen": -19.662979125976562,
528
+ "logps/rejected": -25.61541748046875,
529
+ "loss": 0.6812,
530
+ "rewards/accuracies": 0.6875,
531
+ "rewards/chosen": -0.0001765764318406582,
532
+ "rewards/margins": 0.02442769892513752,
533
+ "rewards/rejected": -0.024604275822639465,
534
+ "step": 35
535
+ },
536
+ {
537
+ "epoch": 0.3291428571428571,
538
+ "grad_norm": 0.6621116399765015,
539
+ "learning_rate": 2.262445466430292e-06,
540
+ "logits/chosen": -2.138071060180664,
541
+ "logits/rejected": -2.139529228210449,
542
+ "logps/chosen": -19.943336486816406,
543
+ "logps/rejected": -23.18177032470703,
544
+ "loss": 0.6822,
545
+ "rewards/accuracies": 0.8125,
546
+ "rewards/chosen": -0.006531356833875179,
547
+ "rewards/margins": 0.022531913593411446,
548
+ "rewards/rejected": -0.0290632676333189,
549
+ "step": 36
550
+ },
551
+ {
552
+ "epoch": 0.3382857142857143,
553
+ "grad_norm": 0.7125285863876343,
554
+ "learning_rate": 2.2249025476265262e-06,
555
+ "logits/chosen": -2.1278233528137207,
556
+ "logits/rejected": -2.1309316158294678,
557
+ "logps/chosen": -21.678462982177734,
558
+ "logps/rejected": -23.819469451904297,
559
+ "loss": 0.6809,
560
+ "rewards/accuracies": 0.75,
561
+ "rewards/chosen": -0.003998810425400734,
562
+ "rewards/margins": 0.025094730779528618,
563
+ "rewards/rejected": -0.029093541204929352,
564
+ "step": 37
565
+ },
566
+ {
567
+ "epoch": 0.3474285714285714,
568
+ "grad_norm": 0.6747680902481079,
569
+ "learning_rate": 2.1867574904409007e-06,
570
+ "logits/chosen": -2.128553628921509,
571
+ "logits/rejected": -2.1311511993408203,
572
+ "logps/chosen": -18.51136589050293,
573
+ "logps/rejected": -24.083953857421875,
574
+ "loss": 0.6797,
575
+ "rewards/accuracies": 0.75,
576
+ "rewards/chosen": 0.00025194010231643915,
577
+ "rewards/margins": 0.027810033410787582,
578
+ "rewards/rejected": -0.027558093890547752,
579
+ "step": 38
580
+ },
581
+ {
582
+ "epoch": 0.3565714285714286,
583
+ "grad_norm": 0.6505147218704224,
584
+ "learning_rate": 2.1480419799641695e-06,
585
+ "logits/chosen": -2.1170382499694824,
586
+ "logits/rejected": -2.1211585998535156,
587
+ "logps/chosen": -18.79464340209961,
588
+ "logps/rejected": -23.59588050842285,
589
+ "loss": 0.6836,
590
+ "rewards/accuracies": 0.703125,
591
+ "rewards/chosen": -0.006827862001955509,
592
+ "rewards/margins": 0.019631091505289078,
593
+ "rewards/rejected": -0.026458950713276863,
594
+ "step": 39
595
+ },
596
+ {
597
+ "epoch": 0.3657142857142857,
598
+ "grad_norm": 0.6365678310394287,
599
+ "learning_rate": 2.1087881751326035e-06,
600
+ "logits/chosen": -2.1277003288269043,
601
+ "logits/rejected": -2.1313459873199463,
602
+ "logps/chosen": -20.50314712524414,
603
+ "logps/rejected": -22.63813018798828,
604
+ "loss": 0.6812,
605
+ "rewards/accuracies": 0.75,
606
+ "rewards/chosen": 0.0014799695927649736,
607
+ "rewards/margins": 0.024493195116519928,
608
+ "rewards/rejected": -0.023013222962617874,
609
+ "step": 40
610
+ },
611
+ {
612
+ "epoch": 0.37485714285714283,
613
+ "grad_norm": 0.6673828959465027,
614
+ "learning_rate": 2.0690286820152535e-06,
615
+ "logits/chosen": -2.1289217472076416,
616
+ "logits/rejected": -2.131746768951416,
617
+ "logps/chosen": -20.128999710083008,
618
+ "logps/rejected": -23.057846069335938,
619
+ "loss": 0.6726,
620
+ "rewards/accuracies": 0.78125,
621
+ "rewards/chosen": 0.004846580792218447,
622
+ "rewards/margins": 0.042193807661533356,
623
+ "rewards/rejected": -0.037347227334976196,
624
+ "step": 41
625
+ },
626
+ {
627
+ "epoch": 0.384,
628
+ "grad_norm": 0.6725500226020813,
629
+ "learning_rate": 2.028796526729806e-06,
630
+ "logits/chosen": -2.121724843978882,
631
+ "logits/rejected": -2.125291347503662,
632
+ "logps/chosen": -19.95975112915039,
633
+ "logps/rejected": -24.067777633666992,
634
+ "loss": 0.6773,
635
+ "rewards/accuracies": 0.6875,
636
+ "rewards/chosen": 0.000510699232108891,
637
+ "rewards/margins": 0.03263135999441147,
638
+ "rewards/rejected": -0.03212066367268562,
639
+ "step": 42
640
+ },
641
+ {
642
+ "epoch": 0.3931428571428571,
643
+ "grad_norm": 0.7096243500709534,
644
+ "learning_rate": 1.9881251280095263e-06,
645
+ "logits/chosen": -2.12835693359375,
646
+ "logits/rejected": -2.1325571537017822,
647
+ "logps/chosen": -19.971481323242188,
648
+ "logps/rejected": -24.266193389892578,
649
+ "loss": 0.6759,
650
+ "rewards/accuracies": 0.8125,
651
+ "rewards/chosen": 0.0037878660950809717,
652
+ "rewards/margins": 0.03536036238074303,
653
+ "rewards/rejected": -0.03157249093055725,
654
+ "step": 43
655
+ },
656
+ {
657
+ "epoch": 0.4022857142857143,
658
+ "grad_norm": 0.6290874481201172,
659
+ "learning_rate": 1.9470482694440755e-06,
660
+ "logits/chosen": -2.139394760131836,
661
+ "logits/rejected": -2.1419851779937744,
662
+ "logps/chosen": -17.84711265563965,
663
+ "logps/rejected": -22.699108123779297,
664
+ "loss": 0.6802,
665
+ "rewards/accuracies": 0.640625,
666
+ "rewards/chosen": -0.0038270740769803524,
667
+ "rewards/margins": 0.027179870754480362,
668
+ "rewards/rejected": -0.031006945297122,
669
+ "step": 44
670
+ },
671
+ {
672
+ "epoch": 0.4114285714285714,
673
+ "grad_norm": 0.6832275986671448,
674
+ "learning_rate": 1.9056000714172617e-06,
675
+ "logits/chosen": -2.138123035430908,
676
+ "logits/rejected": -2.142123222351074,
677
+ "logps/chosen": -19.396350860595703,
678
+ "logps/rejected": -22.903085708618164,
679
+ "loss": 0.6728,
680
+ "rewards/accuracies": 0.71875,
681
+ "rewards/chosen": -0.000809194054454565,
682
+ "rewards/margins": 0.042555954307317734,
683
+ "rewards/rejected": -0.04336514696478844,
684
+ "step": 45
685
+ },
686
+ {
687
+ "epoch": 0.4205714285714286,
688
+ "grad_norm": 0.6851588487625122,
689
+ "learning_rate": 1.8638149627650335e-06,
690
+ "logits/chosen": -2.1379756927490234,
691
+ "logits/rejected": -2.1380934715270996,
692
+ "logps/chosen": -21.08904266357422,
693
+ "logps/rejected": -23.63918685913086,
694
+ "loss": 0.674,
695
+ "rewards/accuracies": 0.78125,
696
+ "rewards/chosen": 0.001778717152774334,
697
+ "rewards/margins": 0.03978656232357025,
698
+ "rewards/rejected": -0.03800784423947334,
699
+ "step": 46
700
+ },
701
+ {
702
+ "epoch": 0.4297142857142857,
703
+ "grad_norm": 0.6948539614677429,
704
+ "learning_rate": 1.8217276521772582e-06,
705
+ "logits/chosen": -2.1302433013916016,
706
+ "logits/rejected": -2.1331663131713867,
707
+ "logps/chosen": -20.23948860168457,
708
+ "logps/rejected": -23.1295223236084,
709
+ "loss": 0.6725,
710
+ "rewards/accuracies": 0.765625,
711
+ "rewards/chosen": -0.004014923237264156,
712
+ "rewards/margins": 0.042658429592847824,
713
+ "rewards/rejected": -0.046673357486724854,
714
+ "step": 47
715
+ },
716
+ {
717
+ "epoch": 0.43885714285714283,
718
+ "grad_norm": 0.6569979190826416,
719
+ "learning_rate": 1.7793730993670408e-06,
720
+ "logits/chosen": -2.1294007301330566,
721
+ "logits/rejected": -2.1324024200439453,
722
+ "logps/chosen": -20.591182708740234,
723
+ "logps/rejected": -23.661182403564453,
724
+ "loss": 0.6775,
725
+ "rewards/accuracies": 0.6875,
726
+ "rewards/chosen": -0.004753855522722006,
727
+ "rewards/margins": 0.03247044235467911,
728
+ "rewards/rejected": -0.03722430020570755,
729
+ "step": 48
730
+ },
731
+ {
732
+ "epoch": 0.448,
733
+ "grad_norm": 0.6771251559257507,
734
+ "learning_rate": 1.736786486031531e-06,
735
+ "logits/chosen": -2.126737117767334,
736
+ "logits/rejected": -2.1294384002685547,
737
+ "logps/chosen": -20.071245193481445,
738
+ "logps/rejected": -22.262264251708984,
739
+ "loss": 0.6708,
740
+ "rewards/accuracies": 0.703125,
741
+ "rewards/chosen": 0.005568951368331909,
742
+ "rewards/margins": 0.04644326865673065,
743
+ "rewards/rejected": -0.04087432101368904,
744
+ "step": 49
745
+ },
746
+ {
747
+ "epoch": 0.45714285714285713,
748
+ "grad_norm": 0.6473885774612427,
749
+ "learning_rate": 1.6940031866283395e-06,
750
+ "logits/chosen": -2.1336934566497803,
751
+ "logits/rejected": -2.1349339485168457,
752
+ "logps/chosen": -19.305700302124023,
753
+ "logps/rejected": -21.597030639648438,
754
+ "loss": 0.6798,
755
+ "rewards/accuracies": 0.671875,
756
+ "rewards/chosen": -0.002087415661662817,
757
+ "rewards/margins": 0.02759050950407982,
758
+ "rewards/rejected": -0.029677925631403923,
759
+ "step": 50
760
+ },
761
+ {
762
+ "epoch": 0.4662857142857143,
763
+ "grad_norm": 0.6721633672714233,
764
+ "learning_rate": 1.6510587389918377e-06,
765
+ "logits/chosen": -2.1234569549560547,
766
+ "logits/rejected": -2.1260986328125,
767
+ "logps/chosen": -20.71694564819336,
768
+ "logps/rejected": -24.932897567749023,
769
+ "loss": 0.6813,
770
+ "rewards/accuracies": 0.703125,
771
+ "rewards/chosen": -0.011087682098150253,
772
+ "rewards/margins": 0.024922657757997513,
773
+ "rewards/rejected": -0.036010339856147766,
774
+ "step": 51
775
+ },
776
+ {
777
+ "epoch": 0.4754285714285714,
778
+ "grad_norm": 0.7036443948745728,
779
+ "learning_rate": 1.6079888148137507e-06,
780
+ "logits/chosen": -2.1245672702789307,
781
+ "logits/rejected": -2.1277780532836914,
782
+ "logps/chosen": -21.870973587036133,
783
+ "logps/rejected": -24.941219329833984,
784
+ "loss": 0.6717,
785
+ "rewards/accuracies": 0.796875,
786
+ "rewards/chosen": -0.0029917103238403797,
787
+ "rewards/margins": 0.04497722536325455,
788
+ "rewards/rejected": -0.04796893894672394,
789
+ "step": 52
790
+ },
791
+ {
792
+ "epoch": 0.4845714285714286,
793
+ "grad_norm": 0.68109130859375,
794
+ "learning_rate": 1.564829190012561e-06,
795
+ "logits/chosen": -2.1461524963378906,
796
+ "logits/rejected": -2.1497020721435547,
797
+ "logps/chosen": -21.60137176513672,
798
+ "logps/rejected": -25.58949089050293,
799
+ "loss": 0.6722,
800
+ "rewards/accuracies": 0.71875,
801
+ "rewards/chosen": -0.005693153943866491,
802
+ "rewards/margins": 0.043281424790620804,
803
+ "rewards/rejected": -0.04897458106279373,
804
+ "step": 53
805
+ },
806
+ {
807
+ "epoch": 0.4937142857142857,
808
+ "grad_norm": 0.6599106192588806,
809
+ "learning_rate": 1.521615715016336e-06,
810
+ "logits/chosen": -2.140432357788086,
811
+ "logits/rejected": -2.1406521797180176,
812
+ "logps/chosen": -20.149822235107422,
813
+ "logps/rejected": -22.249767303466797,
814
+ "loss": 0.6777,
815
+ "rewards/accuracies": 0.734375,
816
+ "rewards/chosen": -0.00302593014203012,
817
+ "rewards/margins": 0.0317457839846611,
818
+ "rewards/rejected": -0.03477171063423157,
819
+ "step": 54
820
+ },
821
+ {
822
+ "epoch": 0.5028571428571429,
823
+ "grad_norm": 0.7043587565422058,
824
+ "learning_rate": 1.4783842849836645e-06,
825
+ "logits/chosen": -2.1249215602874756,
826
+ "logits/rejected": -2.1282291412353516,
827
+ "logps/chosen": -20.65789031982422,
828
+ "logps/rejected": -23.718164443969727,
829
+ "loss": 0.668,
830
+ "rewards/accuracies": 0.765625,
831
+ "rewards/chosen": 0.0033555193804204464,
832
+ "rewards/margins": 0.05225363373756409,
833
+ "rewards/rejected": -0.048898108303546906,
834
+ "step": 55
835
+ },
836
+ {
837
+ "epoch": 0.512,
838
+ "grad_norm": 0.6670368313789368,
839
+ "learning_rate": 1.435170809987439e-06,
840
+ "logits/chosen": -2.122504949569702,
841
+ "logits/rejected": -2.126192569732666,
842
+ "logps/chosen": -20.417633056640625,
843
+ "logps/rejected": -24.24279022216797,
844
+ "loss": 0.6724,
845
+ "rewards/accuracies": 0.703125,
846
+ "rewards/chosen": -0.006906700320541859,
847
+ "rewards/margins": 0.04406347870826721,
848
+ "rewards/rejected": -0.0509701743721962,
849
+ "step": 56
850
+ },
851
+ {
852
+ "epoch": 0.5211428571428571,
853
+ "grad_norm": 0.6748237013816833,
854
+ "learning_rate": 1.3920111851862494e-06,
855
+ "logits/chosen": -2.1295788288116455,
856
+ "logits/rejected": -2.132110834121704,
857
+ "logps/chosen": -20.603960037231445,
858
+ "logps/rejected": -23.54561996459961,
859
+ "loss": 0.6711,
860
+ "rewards/accuracies": 0.765625,
861
+ "rewards/chosen": -0.011474862694740295,
862
+ "rewards/margins": 0.04592112824320793,
863
+ "rewards/rejected": -0.057395994663238525,
864
+ "step": 57
865
+ },
866
+ {
867
+ "epoch": 0.5302857142857142,
868
+ "grad_norm": 0.6662198901176453,
869
+ "learning_rate": 1.3489412610081626e-06,
870
+ "logits/chosen": -2.1300594806671143,
871
+ "logits/rejected": -2.132218837738037,
872
+ "logps/chosen": -20.97345542907715,
873
+ "logps/rejected": -24.11111068725586,
874
+ "loss": 0.6737,
875
+ "rewards/accuracies": 0.703125,
876
+ "rewards/chosen": -0.015223701484501362,
877
+ "rewards/margins": 0.04077855125069618,
878
+ "rewards/rejected": -0.05600225552916527,
879
+ "step": 58
880
+ },
881
+ {
882
+ "epoch": 0.5394285714285715,
883
+ "grad_norm": 0.6957614421844482,
884
+ "learning_rate": 1.3059968133716607e-06,
885
+ "logits/chosen": -2.132567882537842,
886
+ "logits/rejected": -2.132495880126953,
887
+ "logps/chosen": -21.196874618530273,
888
+ "logps/rejected": -23.613279342651367,
889
+ "loss": 0.6747,
890
+ "rewards/accuracies": 0.734375,
891
+ "rewards/chosen": -0.008941511623561382,
892
+ "rewards/margins": 0.03851575776934624,
893
+ "rewards/rejected": -0.047457270324230194,
894
+ "step": 59
895
+ },
896
+ {
897
+ "epoch": 0.5485714285714286,
898
+ "grad_norm": 0.7162770628929138,
899
+ "learning_rate": 1.2632135139684691e-06,
900
+ "logits/chosen": -2.1271543502807617,
901
+ "logits/rejected": -2.130880117416382,
902
+ "logps/chosen": -20.923919677734375,
903
+ "logps/rejected": -25.65717315673828,
904
+ "loss": 0.6663,
905
+ "rewards/accuracies": 0.78125,
906
+ "rewards/chosen": 0.009123304858803749,
907
+ "rewards/margins": 0.05547190085053444,
908
+ "rewards/rejected": -0.04634860157966614,
909
+ "step": 60
910
+ },
911
+ {
912
+ "epoch": 0.5577142857142857,
913
+ "grad_norm": 0.6612498164176941,
914
+ "learning_rate": 1.2206269006329595e-06,
915
+ "logits/chosen": -2.116666316986084,
916
+ "logits/rejected": -2.1212408542633057,
917
+ "logps/chosen": -20.269481658935547,
918
+ "logps/rejected": -24.660919189453125,
919
+ "loss": 0.6721,
920
+ "rewards/accuracies": 0.75,
921
+ "rewards/chosen": -0.013498620130121708,
922
+ "rewards/margins": 0.0441305935382843,
923
+ "rewards/rejected": -0.057629212737083435,
924
+ "step": 61
925
+ },
926
+ {
927
+ "epoch": 0.5668571428571428,
928
+ "grad_norm": 0.6904810667037964,
929
+ "learning_rate": 1.178272347822742e-06,
930
+ "logits/chosen": -2.1359639167785645,
931
+ "logits/rejected": -2.137200355529785,
932
+ "logps/chosen": -21.87899398803711,
933
+ "logps/rejected": -22.924833297729492,
934
+ "loss": 0.6645,
935
+ "rewards/accuracies": 0.65625,
936
+ "rewards/chosen": 0.008273976854979992,
937
+ "rewards/margins": 0.06065124645829201,
938
+ "rewards/rejected": -0.05237726867198944,
939
+ "step": 62
940
+ },
941
+ {
942
+ "epoch": 0.576,
943
+ "grad_norm": 0.6719346642494202,
944
+ "learning_rate": 1.1361850372349668e-06,
945
+ "logits/chosen": -2.134481906890869,
946
+ "logits/rejected": -2.136564016342163,
947
+ "logps/chosen": -20.749956130981445,
948
+ "logps/rejected": -24.487869262695312,
949
+ "loss": 0.6714,
950
+ "rewards/accuracies": 0.78125,
951
+ "rewards/chosen": -0.005040324293076992,
952
+ "rewards/margins": 0.045264218002557755,
953
+ "rewards/rejected": -0.05030454322695732,
954
+ "step": 63
955
+ },
956
+ {
957
+ "epoch": 0.5851428571428572,
958
+ "grad_norm": 0.6895278692245483,
959
+ "learning_rate": 1.0943999285827381e-06,
960
+ "logits/chosen": -2.1309783458709717,
961
+ "logits/rejected": -2.133222818374634,
962
+ "logps/chosen": -21.91169548034668,
963
+ "logps/rejected": -23.928085327148438,
964
+ "loss": 0.6741,
965
+ "rewards/accuracies": 0.671875,
966
+ "rewards/chosen": -0.007841155864298344,
967
+ "rewards/margins": 0.03996167331933975,
968
+ "rewards/rejected": -0.04780282825231552,
969
+ "step": 64
970
+ },
971
+ {
972
+ "epoch": 0.5942857142857143,
973
+ "grad_norm": 0.6835947036743164,
974
+ "learning_rate": 1.0529517305559246e-06,
975
+ "logits/chosen": -2.1413941383361816,
976
+ "logits/rejected": -2.1449737548828125,
977
+ "logps/chosen": -19.570405960083008,
978
+ "logps/rejected": -23.024538040161133,
979
+ "loss": 0.6683,
980
+ "rewards/accuracies": 0.78125,
981
+ "rewards/chosen": -0.0007449511904269457,
982
+ "rewards/margins": 0.051878269761800766,
983
+ "rewards/rejected": -0.05262322351336479,
984
+ "step": 65
985
+ },
986
+ {
987
+ "epoch": 0.6034285714285714,
988
+ "grad_norm": 0.7086966633796692,
989
+ "learning_rate": 1.0118748719904738e-06,
990
+ "logits/chosen": -2.1314806938171387,
991
+ "logits/rejected": -2.132997512817383,
992
+ "logps/chosen": -22.395124435424805,
993
+ "logps/rejected": -24.68596839904785,
994
+ "loss": 0.6714,
995
+ "rewards/accuracies": 0.734375,
996
+ "rewards/chosen": -0.004053800366818905,
997
+ "rewards/margins": 0.045246005058288574,
998
+ "rewards/rejected": -0.049299806356430054,
999
+ "step": 66
1000
+ },
1001
+ {
1002
+ "epoch": 0.6125714285714285,
1003
+ "grad_norm": 0.7053619623184204,
1004
+ "learning_rate": 9.712034732701943e-07,
1005
+ "logits/chosen": -2.136747360229492,
1006
+ "logits/rejected": -2.1409151554107666,
1007
+ "logps/chosen": -19.340253829956055,
1008
+ "logps/rejected": -26.333112716674805,
1009
+ "loss": 0.6711,
1010
+ "rewards/accuracies": 0.734375,
1011
+ "rewards/chosen": -0.012234330177307129,
1012
+ "rewards/margins": 0.04644213989377022,
1013
+ "rewards/rejected": -0.05867646634578705,
1014
+ "step": 67
1015
+ },
1016
+ {
1017
+ "epoch": 0.6217142857142857,
1018
+ "grad_norm": 0.7142196893692017,
1019
+ "learning_rate": 9.309713179847465e-07,
1020
+ "logits/chosen": -2.1288576126098633,
1021
+ "logits/rejected": -2.132416009902954,
1022
+ "logps/chosen": -21.31295394897461,
1023
+ "logps/rejected": -24.50257682800293,
1024
+ "loss": 0.6644,
1025
+ "rewards/accuracies": 0.765625,
1026
+ "rewards/chosen": -0.0032868993002921343,
1027
+ "rewards/margins": 0.05996156856417656,
1028
+ "rewards/rejected": -0.06324847042560577,
1029
+ "step": 68
1030
+ },
1031
+ {
1032
+ "epoch": 0.6308571428571429,
1033
+ "grad_norm": 0.7126966714859009,
1034
+ "learning_rate": 8.912118248673967e-07,
1035
+ "logits/chosen": -2.118403434753418,
1036
+ "logits/rejected": -2.1224937438964844,
1037
+ "logps/chosen": -20.098617553710938,
1038
+ "logps/rejected": -24.383617401123047,
1039
+ "loss": 0.6529,
1040
+ "rewards/accuracies": 0.8125,
1041
+ "rewards/chosen": 0.013771215453743935,
1042
+ "rewards/margins": 0.08454546332359314,
1043
+ "rewards/rejected": -0.07077424228191376,
1044
+ "step": 69
1045
+ },
1046
+ {
1047
+ "epoch": 0.64,
1048
+ "grad_norm": 0.7309445142745972,
1049
+ "learning_rate": 8.519580200358309e-07,
1050
+ "logits/chosen": -2.1299290657043457,
1051
+ "logits/rejected": -2.1309103965759277,
1052
+ "logps/chosen": -19.261966705322266,
1053
+ "logps/rejected": -21.874065399169922,
1054
+ "loss": 0.6644,
1055
+ "rewards/accuracies": 0.765625,
1056
+ "rewards/chosen": -0.0036550310906022787,
1057
+ "rewards/margins": 0.06022891029715538,
1058
+ "rewards/rejected": -0.0638839453458786,
1059
+ "step": 70
1060
+ },
1061
+ {
1062
+ "epoch": 0.6491428571428571,
1063
+ "grad_norm": 0.6933106780052185,
1064
+ "learning_rate": 8.132425095591e-07,
1065
+ "logits/chosen": -2.126209259033203,
1066
+ "logits/rejected": -2.1274337768554688,
1067
+ "logps/chosen": -21.729415893554688,
1068
+ "logps/rejected": -23.67267417907715,
1069
+ "loss": 0.6612,
1070
+ "rewards/accuracies": 0.796875,
1071
+ "rewards/chosen": -0.0012235536705702543,
1072
+ "rewards/margins": 0.066755510866642,
1073
+ "rewards/rejected": -0.06797906756401062,
1074
+ "step": 71
1075
+ },
1076
+ {
1077
+ "epoch": 0.6582857142857143,
1078
+ "grad_norm": 0.6878139972686768,
1079
+ "learning_rate": 7.750974523734742e-07,
1080
+ "logits/chosen": -2.120508909225464,
1081
+ "logits/rejected": -2.1226325035095215,
1082
+ "logps/chosen": -19.833683013916016,
1083
+ "logps/rejected": -24.432552337646484,
1084
+ "loss": 0.6564,
1085
+ "rewards/accuracies": 0.78125,
1086
+ "rewards/chosen": -0.005440461914986372,
1087
+ "rewards/margins": 0.07887633144855499,
1088
+ "rewards/rejected": -0.08431679010391235,
1089
+ "step": 72
1090
+ },
1091
+ {
1092
+ "epoch": 0.6674285714285715,
1093
+ "grad_norm": 0.6848965883255005,
1094
+ "learning_rate": 7.375545335697085e-07,
1095
+ "logits/chosen": -2.1300716400146484,
1096
+ "logits/rejected": -2.1321635246276855,
1097
+ "logps/chosen": -20.999731063842773,
1098
+ "logps/rejected": -24.038249969482422,
1099
+ "loss": 0.662,
1100
+ "rewards/accuracies": 0.78125,
1101
+ "rewards/chosen": -0.003927034325897694,
1102
+ "rewards/margins": 0.06512106209993362,
1103
+ "rewards/rejected": -0.06904809176921844,
1104
+ "step": 73
1105
+ },
1106
+ {
1107
+ "epoch": 0.6765714285714286,
1108
+ "grad_norm": 0.7239253520965576,
1109
+ "learning_rate": 7.00644938073904e-07,
1110
+ "logits/chosen": -2.136348247528076,
1111
+ "logits/rejected": -2.137990951538086,
1112
+ "logps/chosen": -21.84921646118164,
1113
+ "logps/rejected": -25.642963409423828,
1114
+ "loss": 0.6528,
1115
+ "rewards/accuracies": 0.8125,
1116
+ "rewards/chosen": 0.001280774362385273,
1117
+ "rewards/margins": 0.08441222459077835,
1118
+ "rewards/rejected": -0.08313144743442535,
1119
+ "step": 74
1120
+ },
1121
+ {
1122
+ "epoch": 0.6857142857142857,
1123
+ "grad_norm": 0.6780479550361633,
1124
+ "learning_rate": 6.643993247438348e-07,
1125
+ "logits/chosen": -2.122738838195801,
1126
+ "logits/rejected": -2.127403497695923,
1127
+ "logps/chosen": -19.919052124023438,
1128
+ "logps/rejected": -23.570457458496094,
1129
+ "loss": 0.6607,
1130
+ "rewards/accuracies": 0.8125,
1131
+ "rewards/chosen": 0.0010643948335200548,
1132
+ "rewards/margins": 0.06831549108028412,
1133
+ "rewards/rejected": -0.06725109368562698,
1134
+ "step": 75
1135
+ },
1136
+ {
1137
+ "epoch": 0.6948571428571428,
1138
+ "grad_norm": 0.7106800675392151,
1139
+ "learning_rate": 6.288478009022447e-07,
1140
+ "logits/chosen": -2.1340596675872803,
1141
+ "logits/rejected": -2.1375958919525146,
1142
+ "logps/chosen": -20.532428741455078,
1143
+ "logps/rejected": -23.923845291137695,
1144
+ "loss": 0.6577,
1145
+ "rewards/accuracies": 0.84375,
1146
+ "rewards/chosen": 0.0032222855370491743,
1147
+ "rewards/margins": 0.07417334616184235,
1148
+ "rewards/rejected": -0.07095105946063995,
1149
+ "step": 76
1150
+ },
1151
+ {
1152
+ "epoch": 0.704,
1153
+ "grad_norm": 0.6871652007102966,
1154
+ "learning_rate": 5.940198973282838e-07,
1155
+ "logits/chosen": -2.1262307167053223,
1156
+ "logits/rejected": -2.131108283996582,
1157
+ "logps/chosen": -18.413406372070312,
1158
+ "logps/rejected": -23.341732025146484,
1159
+ "loss": 0.6645,
1160
+ "rewards/accuracies": 0.6875,
1161
+ "rewards/chosen": -0.01665385626256466,
1162
+ "rewards/margins": 0.060584962368011475,
1163
+ "rewards/rejected": -0.07723881304264069,
1164
+ "step": 77
1165
+ },
1166
+ {
1167
+ "epoch": 0.7131428571428572,
1168
+ "grad_norm": 0.6611953973770142,
1169
+ "learning_rate": 5.599445437278412e-07,
1170
+ "logits/chosen": -2.135463237762451,
1171
+ "logits/rejected": -2.1379756927490234,
1172
+ "logps/chosen": -18.5693359375,
1173
+ "logps/rejected": -21.502685546875,
1174
+ "loss": 0.6567,
1175
+ "rewards/accuracies": 0.765625,
1176
+ "rewards/chosen": 0.005435650702565908,
1177
+ "rewards/margins": 0.0763789713382721,
1178
+ "rewards/rejected": -0.07094332575798035,
1179
+ "step": 78
1180
+ },
1181
+ {
1182
+ "epoch": 0.7222857142857143,
1183
+ "grad_norm": 0.7486832141876221,
1184
+ "learning_rate": 5.266500447031646e-07,
1185
+ "logits/chosen": -2.1247940063476562,
1186
+ "logits/rejected": -2.122842788696289,
1187
+ "logps/chosen": -21.570556640625,
1188
+ "logps/rejected": -21.164236068725586,
1189
+ "loss": 0.6655,
1190
+ "rewards/accuracies": 0.78125,
1191
+ "rewards/chosen": -0.01216865424066782,
1192
+ "rewards/margins": 0.05833979696035385,
1193
+ "rewards/rejected": -0.0705084502696991,
1194
+ "step": 79
1195
+ },
1196
+ {
1197
+ "epoch": 0.7314285714285714,
1198
+ "grad_norm": 0.6928249001502991,
1199
+ "learning_rate": 4.941640562417138e-07,
1200
+ "logits/chosen": -2.1150875091552734,
1201
+ "logits/rejected": -2.1165449619293213,
1202
+ "logps/chosen": -21.510713577270508,
1203
+ "logps/rejected": -24.29326057434082,
1204
+ "loss": 0.657,
1205
+ "rewards/accuracies": 0.8125,
1206
+ "rewards/chosen": -0.006187473423779011,
1207
+ "rewards/margins": 0.07648099958896637,
1208
+ "rewards/rejected": -0.0826684832572937,
1209
+ "step": 80
1210
+ },
1211
+ {
1212
+ "epoch": 0.7405714285714285,
1213
+ "grad_norm": 0.7375283241271973,
1214
+ "learning_rate": 4.6251356274379226e-07,
1215
+ "logits/chosen": -2.1273298263549805,
1216
+ "logits/rejected": -2.129077434539795,
1217
+ "logps/chosen": -22.487407684326172,
1218
+ "logps/rejected": -24.35370635986328,
1219
+ "loss": 0.6587,
1220
+ "rewards/accuracies": 0.796875,
1221
+ "rewards/chosen": -0.004263547249138355,
1222
+ "rewards/margins": 0.0724744200706482,
1223
+ "rewards/rejected": -0.07673796266317368,
1224
+ "step": 81
1225
+ },
1226
+ {
1227
+ "epoch": 0.7497142857142857,
1228
+ "grad_norm": 0.7093409299850464,
1229
+ "learning_rate": 4.317248546080218e-07,
1230
+ "logits/chosen": -2.1207175254821777,
1231
+ "logits/rejected": -2.124617099761963,
1232
+ "logps/chosen": -19.91944694519043,
1233
+ "logps/rejected": -25.45476722717285,
1234
+ "loss": 0.6658,
1235
+ "rewards/accuracies": 0.75,
1236
+ "rewards/chosen": -0.011447591707110405,
1237
+ "rewards/margins": 0.05775396525859833,
1238
+ "rewards/rejected": -0.06920155882835388,
1239
+ "step": 82
1240
+ },
1241
+ {
1242
+ "epoch": 0.7588571428571429,
1243
+ "grad_norm": 0.659227728843689,
1244
+ "learning_rate": 4.018235063932971e-07,
1245
+ "logits/chosen": -2.129696846008301,
1246
+ "logits/rejected": -2.1302928924560547,
1247
+ "logps/chosen": -19.911724090576172,
1248
+ "logps/rejected": -21.486099243164062,
1249
+ "loss": 0.6629,
1250
+ "rewards/accuracies": 0.6875,
1251
+ "rewards/chosen": -0.015592245385050774,
1252
+ "rewards/margins": 0.0647309422492981,
1253
+ "rewards/rejected": -0.08032318949699402,
1254
+ "step": 83
1255
+ },
1256
+ {
1257
+ "epoch": 0.768,
1258
+ "grad_norm": 0.6856977343559265,
1259
+ "learning_rate": 3.7283435557534184e-07,
1260
+ "logits/chosen": -2.115324020385742,
1261
+ "logits/rejected": -2.118356704711914,
1262
+ "logps/chosen": -20.232105255126953,
1263
+ "logps/rejected": -25.640562057495117,
1264
+ "loss": 0.6603,
1265
+ "rewards/accuracies": 0.796875,
1266
+ "rewards/chosen": -0.0029051026795059443,
1267
+ "rewards/margins": 0.07027439773082733,
1268
+ "rewards/rejected": -0.07317950576543808,
1269
+ "step": 84
1270
+ },
1271
+ {
1272
+ "epoch": 0.7771428571428571,
1273
+ "grad_norm": 0.9134950637817383,
1274
+ "learning_rate": 3.447814819155292e-07,
1275
+ "logits/chosen": -2.1142709255218506,
1276
+ "logits/rejected": -2.1176223754882812,
1277
+ "logps/chosen": -22.407655715942383,
1278
+ "logps/rejected": -25.338939666748047,
1279
+ "loss": 0.6606,
1280
+ "rewards/accuracies": 0.765625,
1281
+ "rewards/chosen": -0.003580818185582757,
1282
+ "rewards/margins": 0.06903493404388428,
1283
+ "rewards/rejected": -0.07261575758457184,
1284
+ "step": 85
1285
+ },
1286
+ {
1287
+ "epoch": 0.7862857142857143,
1288
+ "grad_norm": 0.8176380395889282,
1289
+ "learning_rate": 3.1768818745908876e-07,
1290
+ "logits/chosen": -2.128533363342285,
1291
+ "logits/rejected": -2.130959987640381,
1292
+ "logps/chosen": -21.104782104492188,
1293
+ "logps/rejected": -23.96923065185547,
1294
+ "loss": 0.6615,
1295
+ "rewards/accuracies": 0.734375,
1296
+ "rewards/chosen": -0.01085490919649601,
1297
+ "rewards/margins": 0.06671939790248871,
1298
+ "rewards/rejected": -0.07757431268692017,
1299
+ "step": 86
1300
+ },
1301
+ {
1302
+ "epoch": 0.7954285714285714,
1303
+ "grad_norm": 0.7210907340049744,
1304
+ "learning_rate": 2.915769771793256e-07,
1305
+ "logits/chosen": -2.115241050720215,
1306
+ "logits/rejected": -2.1181540489196777,
1307
+ "logps/chosen": -19.796510696411133,
1308
+ "logps/rejected": -24.347612380981445,
1309
+ "loss": 0.6599,
1310
+ "rewards/accuracies": 0.71875,
1311
+ "rewards/chosen": 0.0054216571152210236,
1312
+ "rewards/margins": 0.07189285755157471,
1313
+ "rewards/rejected": -0.06647119671106339,
1314
+ "step": 87
1315
+ },
1316
+ {
1317
+ "epoch": 0.8045714285714286,
1318
+ "grad_norm": 0.7129194140434265,
1319
+ "learning_rate": 2.6646954028392375e-07,
1320
+ "logits/chosen": -2.1180641651153564,
1321
+ "logits/rejected": -2.118509531021118,
1322
+ "logps/chosen": -21.633447647094727,
1323
+ "logps/rejected": -24.8947811126709,
1324
+ "loss": 0.6714,
1325
+ "rewards/accuracies": 0.703125,
1326
+ "rewards/chosen": -0.018895957618951797,
1327
+ "rewards/margins": 0.04705498740077019,
1328
+ "rewards/rejected": -0.06595094501972198,
1329
+ "step": 88
1330
+ },
1331
+ {
1332
+ "epoch": 0.8137142857142857,
1333
+ "grad_norm": 0.751731276512146,
1334
+ "learning_rate": 2.4238673219886385e-07,
1335
+ "logits/chosen": -2.1097123622894287,
1336
+ "logits/rejected": -2.1142430305480957,
1337
+ "logps/chosen": -21.052400588989258,
1338
+ "logps/rejected": -25.112659454345703,
1339
+ "loss": 0.6497,
1340
+ "rewards/accuracies": 0.859375,
1341
+ "rewards/chosen": 0.006454586982727051,
1342
+ "rewards/margins": 0.09120012819766998,
1343
+ "rewards/rejected": -0.08474554121494293,
1344
+ "step": 89
1345
+ },
1346
+ {
1347
+ "epoch": 0.8228571428571428,
1348
+ "grad_norm": 0.7012993693351746,
1349
+ "learning_rate": 2.1934855724491708e-07,
1350
+ "logits/chosen": -2.108950138092041,
1351
+ "logits/rejected": -2.1133596897125244,
1352
+ "logps/chosen": -20.4837646484375,
1353
+ "logps/rejected": -24.65631866455078,
1354
+ "loss": 0.6582,
1355
+ "rewards/accuracies": 0.8125,
1356
+ "rewards/chosen": -0.00821665208786726,
1357
+ "rewards/margins": 0.07355347275733948,
1358
+ "rewards/rejected": -0.08177012950181961,
1359
+ "step": 90
1360
+ },
1361
+ {
1362
+ "epoch": 0.832,
1363
+ "grad_norm": 0.7529146075248718,
1364
+ "learning_rate": 1.9737415202111148e-07,
1365
+ "logits/chosen": -2.118992805480957,
1366
+ "logits/rejected": -2.1215620040893555,
1367
+ "logps/chosen": -21.363391876220703,
1368
+ "logps/rejected": -26.736839294433594,
1369
+ "loss": 0.6614,
1370
+ "rewards/accuracies": 0.765625,
1371
+ "rewards/chosen": -0.017014339566230774,
1372
+ "rewards/margins": 0.06742921471595764,
1373
+ "rewards/rejected": -0.08444354683160782,
1374
+ "step": 91
1375
+ },
1376
+ {
1377
+ "epoch": 0.8411428571428572,
1378
+ "grad_norm": 0.6690794229507446,
1379
+ "learning_rate": 1.764817695089636e-07,
1380
+ "logits/chosen": -2.1333415508270264,
1381
+ "logits/rejected": -2.1368355751037598,
1382
+ "logps/chosen": -18.803630828857422,
1383
+ "logps/rejected": -24.878826141357422,
1384
+ "loss": 0.6552,
1385
+ "rewards/accuracies": 0.78125,
1386
+ "rewards/chosen": 0.0014820651849731803,
1387
+ "rewards/margins": 0.08085457980632782,
1388
+ "rewards/rejected": -0.07937251776456833,
1389
+ "step": 92
1390
+ },
1391
+ {
1392
+ "epoch": 0.8502857142857143,
1393
+ "grad_norm": 0.6794025301933289,
1394
+ "learning_rate": 1.566887639106911e-07,
1395
+ "logits/chosen": -2.12078595161438,
1396
+ "logits/rejected": -2.1245594024658203,
1397
+ "logps/chosen": -20.484092712402344,
1398
+ "logps/rejected": -24.03988265991211,
1399
+ "loss": 0.6592,
1400
+ "rewards/accuracies": 0.703125,
1401
+ "rewards/chosen": -0.0017876154743134975,
1402
+ "rewards/margins": 0.07191066443920135,
1403
+ "rewards/rejected": -0.07369828224182129,
1404
+ "step": 93
1405
+ },
1406
+ {
1407
+ "epoch": 0.8594285714285714,
1408
+ "grad_norm": 0.6425016522407532,
1409
+ "learning_rate": 1.380115762339877e-07,
1410
+ "logits/chosen": -2.126593589782715,
1411
+ "logits/rejected": -2.125735282897949,
1412
+ "logps/chosen": -19.677227020263672,
1413
+ "logps/rejected": -20.280244827270508,
1414
+ "loss": 0.6657,
1415
+ "rewards/accuracies": 0.796875,
1416
+ "rewards/chosen": -0.014084220863878727,
1417
+ "rewards/margins": 0.05799565464258194,
1418
+ "rewards/rejected": -0.07207988202571869,
1419
+ "step": 94
1420
+ },
1421
+ {
1422
+ "epoch": 0.8685714285714285,
1423
+ "grad_norm": 0.7096425294876099,
1424
+ "learning_rate": 1.204657206353459e-07,
1425
+ "logits/chosen": -2.1364822387695312,
1426
+ "logits/rejected": -2.1406309604644775,
1427
+ "logps/chosen": -19.647085189819336,
1428
+ "logps/rejected": -25.55003547668457,
1429
+ "loss": 0.6538,
1430
+ "rewards/accuracies": 0.84375,
1431
+ "rewards/chosen": 0.0021409899927675724,
1432
+ "rewards/margins": 0.0829853042960167,
1433
+ "rewards/rejected": -0.08084432035684586,
1434
+ "step": 95
1435
+ },
1436
+ {
1437
+ "epoch": 0.8777142857142857,
1438
+ "grad_norm": 0.806024968624115,
1439
+ "learning_rate": 1.0406577153326192e-07,
1440
+ "logits/chosen": -2.12524676322937,
1441
+ "logits/rejected": -2.128009557723999,
1442
+ "logps/chosen": -21.744781494140625,
1443
+ "logps/rejected": -24.876564025878906,
1444
+ "loss": 0.664,
1445
+ "rewards/accuracies": 0.796875,
1446
+ "rewards/chosen": -0.014198469929397106,
1447
+ "rewards/margins": 0.060674287378787994,
1448
+ "rewards/rejected": -0.07487276196479797,
1449
+ "step": 96
1450
+ },
1451
+ {
1452
+ "epoch": 0.8868571428571429,
1453
+ "grad_norm": 0.6857902407646179,
1454
+ "learning_rate": 8.882535150203569e-08,
1455
+ "logits/chosen": -2.128683567047119,
1456
+ "logits/rejected": -2.1316354274749756,
1457
+ "logps/chosen": -18.787628173828125,
1458
+ "logps/rejected": -23.368488311767578,
1459
+ "loss": 0.6702,
1460
+ "rewards/accuracies": 0.703125,
1461
+ "rewards/chosen": -0.024203235283493996,
1462
+ "rewards/margins": 0.049591515213251114,
1463
+ "rewards/rejected": -0.07379475235939026,
1464
+ "step": 97
1465
+ },
1466
+ {
1467
+ "epoch": 0.896,
1468
+ "grad_norm": 0.6921999454498291,
1469
+ "learning_rate": 7.475711995621387e-08,
1470
+ "logits/chosen": -2.1213159561157227,
1471
+ "logits/rejected": -2.1230628490448,
1472
+ "logps/chosen": -21.5747127532959,
1473
+ "logps/rejected": -24.342790603637695,
1474
+ "loss": 0.6603,
1475
+ "rewards/accuracies": 0.75,
1476
+ "rewards/chosen": -0.022949904203414917,
1477
+ "rewards/margins": 0.0700790211558342,
1478
+ "rewards/rejected": -0.09302891790866852,
1479
+ "step": 98
1480
+ },
1481
+ {
1482
+ "epoch": 0.9051428571428571,
1483
+ "grad_norm": 0.7130371928215027,
1484
+ "learning_rate": 6.187276263508168e-08,
1485
+ "logits/chosen": -2.1171250343322754,
1486
+ "logits/rejected": -2.119697093963623,
1487
+ "logps/chosen": -21.54928970336914,
1488
+ "logps/rejected": -25.877193450927734,
1489
+ "loss": 0.6673,
1490
+ "rewards/accuracies": 0.71875,
1491
+ "rewards/chosen": -0.009239297360181808,
1492
+ "rewards/margins": 0.055657997727394104,
1493
+ "rewards/rejected": -0.06489729136228561,
1494
+ "step": 99
1495
+ },
1496
+ {
1497
+ "epoch": 0.9142857142857143,
1498
+ "grad_norm": 0.7120369076728821,
1499
+ "learning_rate": 5.018298189593368e-08,
1500
+ "logits/chosen": -2.140258312225342,
1501
+ "logits/rejected": -2.1442980766296387,
1502
+ "logps/chosen": -20.42232322692871,
1503
+ "logps/rejected": -25.16228485107422,
1504
+ "loss": 0.6531,
1505
+ "rewards/accuracies": 0.78125,
1506
+ "rewards/chosen": 0.0013755280524492264,
1507
+ "rewards/margins": 0.08448025584220886,
1508
+ "rewards/rejected": -0.08310472220182419,
1509
+ "step": 100
1510
+ },
1511
+ {
1512
+ "epoch": 0.9234285714285714,
1513
+ "grad_norm": 0.6894267797470093,
1514
+ "learning_rate": 3.969748782418991e-08,
1515
+ "logits/chosen": -2.140516757965088,
1516
+ "logits/rejected": -2.143148899078369,
1517
+ "logps/chosen": -20.96661376953125,
1518
+ "logps/rejected": -24.055721282958984,
1519
+ "loss": 0.6593,
1520
+ "rewards/accuracies": 0.796875,
1521
+ "rewards/chosen": -0.004207253456115723,
1522
+ "rewards/margins": 0.07119009643793106,
1523
+ "rewards/rejected": -0.07539734244346619,
1524
+ "step": 101
1525
+ },
1526
+ {
1527
+ "epoch": 0.9325714285714286,
1528
+ "grad_norm": 0.7290534377098083,
1529
+ "learning_rate": 3.042499016773881e-08,
1530
+ "logits/chosen": -2.132014751434326,
1531
+ "logits/rejected": -2.1332242488861084,
1532
+ "logps/chosen": -19.451780319213867,
1533
+ "logps/rejected": -21.663698196411133,
1534
+ "loss": 0.6617,
1535
+ "rewards/accuracies": 0.765625,
1536
+ "rewards/chosen": -0.00491691380739212,
1537
+ "rewards/margins": 0.06624950468540192,
1538
+ "rewards/rejected": -0.07116641849279404,
1539
+ "step": 102
1540
+ },
1541
+ {
1542
+ "epoch": 0.9417142857142857,
1543
+ "grad_norm": 0.7135753035545349,
1544
+ "learning_rate": 2.2373191102207647e-08,
1545
+ "logits/chosen": -2.1299057006835938,
1546
+ "logits/rejected": -2.1332318782806396,
1547
+ "logps/chosen": -19.843944549560547,
1548
+ "logps/rejected": -24.937877655029297,
1549
+ "loss": 0.6494,
1550
+ "rewards/accuracies": 0.8125,
1551
+ "rewards/chosen": 0.0014770530397072434,
1552
+ "rewards/margins": 0.09241947531700134,
1553
+ "rewards/rejected": -0.09094242751598358,
1554
+ "step": 103
1555
+ },
1556
+ {
1557
+ "epoch": 0.9508571428571428,
1558
+ "grad_norm": 0.6818587183952332,
1559
+ "learning_rate": 1.5548778833171463e-08,
1560
+ "logits/chosen": -2.130626916885376,
1561
+ "logits/rejected": -2.132134437561035,
1562
+ "logps/chosen": -21.063608169555664,
1563
+ "logps/rejected": -22.76825714111328,
1564
+ "loss": 0.6539,
1565
+ "rewards/accuracies": 0.828125,
1566
+ "rewards/chosen": -0.0026874844916164875,
1567
+ "rewards/margins": 0.0823042243719101,
1568
+ "rewards/rejected": -0.0849917083978653,
1569
+ "step": 104
1570
+ },
1571
+ {
1572
+ "epoch": 0.96,
1573
+ "grad_norm": 0.6808786392211914,
1574
+ "learning_rate": 9.957422040612507e-09,
1575
+ "logits/chosen": -2.117967128753662,
1576
+ "logits/rejected": -2.1230428218841553,
1577
+ "logps/chosen": -19.646337509155273,
1578
+ "logps/rejected": -24.299863815307617,
1579
+ "loss": 0.6662,
1580
+ "rewards/accuracies": 0.71875,
1581
+ "rewards/chosen": -0.016201000660657883,
1582
+ "rewards/margins": 0.05751265585422516,
1583
+ "rewards/rejected": -0.07371365278959274,
1584
+ "step": 105
1585
+ },
1586
+ {
1587
+ "epoch": 0.9691428571428572,
1588
+ "grad_norm": 0.6646621227264404,
1589
+ "learning_rate": 5.6037651702463e-09,
1590
+ "logits/chosen": -2.1203553676605225,
1591
+ "logits/rejected": -2.1216330528259277,
1592
+ "logps/chosen": -20.876747131347656,
1593
+ "logps/rejected": -23.389541625976562,
1594
+ "loss": 0.661,
1595
+ "rewards/accuracies": 0.78125,
1596
+ "rewards/chosen": -0.013487475924193859,
1597
+ "rewards/margins": 0.06747360527515411,
1598
+ "rewards/rejected": -0.08096107840538025,
1599
+ "step": 106
1600
+ },
1601
+ {
1602
+ "epoch": 0.9782857142857143,
1603
+ "grad_norm": 0.651802659034729,
1604
+ "learning_rate": 2.491424575625123e-09,
1605
+ "logits/chosen": -2.1184940338134766,
1606
+ "logits/rejected": -2.1203343868255615,
1607
+ "logps/chosen": -19.80478858947754,
1608
+ "logps/rejected": -23.504154205322266,
1609
+ "loss": 0.6654,
1610
+ "rewards/accuracies": 0.796875,
1611
+ "rewards/chosen": -0.012541299685835838,
1612
+ "rewards/margins": 0.058582596480846405,
1613
+ "rewards/rejected": -0.07112389802932739,
1614
+ "step": 107
1615
+ },
1616
+ {
1617
+ "epoch": 0.9874285714285714,
1618
+ "grad_norm": 0.6968909502029419,
1619
+ "learning_rate": 6.229855142232399e-10,
1620
+ "logits/chosen": -2.1146626472473145,
1621
+ "logits/rejected": -2.1161539554595947,
1622
+ "logps/chosen": -19.548887252807617,
1623
+ "logps/rejected": -22.608421325683594,
1624
+ "loss": 0.6522,
1625
+ "rewards/accuracies": 0.8125,
1626
+ "rewards/chosen": -0.004665360786020756,
1627
+ "rewards/margins": 0.08596684038639069,
1628
+ "rewards/rejected": -0.09063220024108887,
1629
+ "step": 108
1630
+ },
1631
+ {
1632
+ "epoch": 0.9965714285714286,
1633
+ "grad_norm": 0.7107558250427246,
1634
+ "learning_rate": 0.0,
1635
+ "logits/chosen": -2.1318564414978027,
1636
+ "logits/rejected": -2.134704351425171,
1637
+ "logps/chosen": -21.569801330566406,
1638
+ "logps/rejected": -25.938987731933594,
1639
+ "loss": 0.6582,
1640
+ "rewards/accuracies": 0.796875,
1641
+ "rewards/chosen": -0.003806858789175749,
1642
+ "rewards/margins": 0.07306241989135742,
1643
+ "rewards/rejected": -0.07686927914619446,
1644
+ "step": 109
1645
+ },
1646
+ {
1647
+ "epoch": 0.9965714285714286,
1648
+ "step": 109,
1649
+ "total_flos": 2.557771887987917e+18,
1650
+ "train_loss": 0.6735316744638146,
1651
+ "train_runtime": 12905.5168,
1652
+ "train_samples_per_second": 0.542,
1653
+ "train_steps_per_second": 0.008
1654
+ }
1655
+ ],
1656
+ "logging_steps": 1.0,
1657
+ "max_steps": 109,
1658
+ "num_input_tokens_seen": 0,
1659
+ "num_train_epochs": 1,
1660
+ "save_steps": 500,
1661
+ "stateful_callbacks": {
1662
+ "TrainerControl": {
1663
+ "args": {
1664
+ "should_epoch_stop": false,
1665
+ "should_evaluate": false,
1666
+ "should_log": false,
1667
+ "should_save": true,
1668
+ "should_training_stop": true
1669
+ },
1670
+ "attributes": {}
1671
+ }
1672
+ },
1673
+ "total_flos": 2.557771887987917e+18,
1674
+ "train_batch_size": 1,
1675
+ "trial_name": null,
1676
+ "trial_params": null
1677
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9f2ae178121a70be9448d8bd7ef9d021893fc98c30287e563160263428823b
3
+ size 7224