Billyyy commited on
Commit
fdb51cf
·
verified ·
1 Parent(s): f9237e5

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b19e11f447e7808b4485d558ef1154ac18739d90a3ca661e880731345e57dc3c
3
  size 2718107304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbd5aa58a02999ada4c0f3cd43ca41dde21582eb2d5dc255f13cbc8023650b6
3
  size 2718107304
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f760c332433332f0c407af5847f2519c9fc015ef3d9c6352bf66a977a2ee25dd
3
  size 145486330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7ccc1eca1a7ceff3e0dde210062328c431e027749f3f43c26d6230e844198d
3
  size 145486330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb0ef8513b88a13107327413608c21fb3fd9739eade34e1dd90f5265fc015f0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b09ae3a06fa6068073a53264cd1c287209ec69eb82f509ea260660e6955ead
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:685957d6186c9595a838f507ad76cb3024cdfde7a3a70e0ca3050d9e9db2c6f6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48993fe83df0cc4ae4d85d5c3e846862f5ffd116f5d7e38ed8352e362fdf4bbb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7281995266703076,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2839,6 +2839,356 @@
2839
  "eval_samples_per_second": 9.649,
2840
  "eval_steps_per_second": 1.206,
2841
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2842
  }
2843
  ],
2844
  "logging_steps": 10,
@@ -2858,7 +3208,7 @@
2858
  "attributes": {}
2859
  }
2860
  },
2861
- "total_flos": 9.35257863880704e+17,
2862
  "train_batch_size": 4,
2863
  "trial_name": null,
2864
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8192244675040962,
5
  "eval_steps": 1000,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2839
  "eval_samples_per_second": 9.649,
2840
  "eval_steps_per_second": 1.206,
2841
  "step": 4000
2842
+ },
2843
+ {
2844
+ "epoch": 0.7300200254869834,
2845
+ "grad_norm": 5.740320205688477,
2846
+ "learning_rate": 2.0232804874534313e-05,
2847
+ "loss": 2.4582,
2848
+ "step": 4010
2849
+ },
2850
+ {
2851
+ "epoch": 0.7318405243036592,
2852
+ "grad_norm": 5.987521171569824,
2853
+ "learning_rate": 1.998062403243704e-05,
2854
+ "loss": 2.4499,
2855
+ "step": 4020
2856
+ },
2857
+ {
2858
+ "epoch": 0.733661023120335,
2859
+ "grad_norm": 5.337474346160889,
2860
+ "learning_rate": 1.9729631629190042e-05,
2861
+ "loss": 2.4598,
2862
+ "step": 4030
2863
+ },
2864
+ {
2865
+ "epoch": 0.7354815219370108,
2866
+ "grad_norm": 5.655992031097412,
2867
+ "learning_rate": 1.9479837601346457e-05,
2868
+ "loss": 2.4601,
2869
+ "step": 4040
2870
+ },
2871
+ {
2872
+ "epoch": 0.7373020207536866,
2873
+ "grad_norm": 5.7331414222717285,
2874
+ "learning_rate": 1.923125183801678e-05,
2875
+ "loss": 2.4445,
2876
+ "step": 4050
2877
+ },
2878
+ {
2879
+ "epoch": 0.7391225195703622,
2880
+ "grad_norm": 5.471503257751465,
2881
+ "learning_rate": 1.898388418047753e-05,
2882
+ "loss": 2.4683,
2883
+ "step": 4060
2884
+ },
2885
+ {
2886
+ "epoch": 0.740943018387038,
2887
+ "grad_norm": 5.409184455871582,
2888
+ "learning_rate": 1.87377444217815e-05,
2889
+ "loss": 2.4358,
2890
+ "step": 4070
2891
+ },
2892
+ {
2893
+ "epoch": 0.7427635172037138,
2894
+ "grad_norm": 5.579779148101807,
2895
+ "learning_rate": 1.8492842306370182e-05,
2896
+ "loss": 2.4989,
2897
+ "step": 4080
2898
+ },
2899
+ {
2900
+ "epoch": 0.7445840160203896,
2901
+ "grad_norm": 5.366626262664795,
2902
+ "learning_rate": 1.8249187529687895e-05,
2903
+ "loss": 2.4102,
2904
+ "step": 4090
2905
+ },
2906
+ {
2907
+ "epoch": 0.7464045148370654,
2908
+ "grad_norm": 5.263418674468994,
2909
+ "learning_rate": 1.8006789737797984e-05,
2910
+ "loss": 2.4573,
2911
+ "step": 4100
2912
+ },
2913
+ {
2914
+ "epoch": 0.7482250136537412,
2915
+ "grad_norm": 5.129177570343018,
2916
+ "learning_rate": 1.7765658527000966e-05,
2917
+ "loss": 2.4792,
2918
+ "step": 4110
2919
+ },
2920
+ {
2921
+ "epoch": 0.7500455124704168,
2922
+ "grad_norm": 6.237401962280273,
2923
+ "learning_rate": 1.7525803443454615e-05,
2924
+ "loss": 2.479,
2925
+ "step": 4120
2926
+ },
2927
+ {
2928
+ "epoch": 0.7518660112870926,
2929
+ "grad_norm": 6.163425445556641,
2930
+ "learning_rate": 1.728723398279603e-05,
2931
+ "loss": 2.4222,
2932
+ "step": 4130
2933
+ },
2934
+ {
2935
+ "epoch": 0.7536865101037684,
2936
+ "grad_norm": 5.254932403564453,
2937
+ "learning_rate": 1.7049959589765686e-05,
2938
+ "loss": 2.4307,
2939
+ "step": 4140
2940
+ },
2941
+ {
2942
+ "epoch": 0.7555070089204442,
2943
+ "grad_norm": 6.144068717956543,
2944
+ "learning_rate": 1.6813989657833534e-05,
2945
+ "loss": 2.4923,
2946
+ "step": 4150
2947
+ },
2948
+ {
2949
+ "epoch": 0.75732750773712,
2950
+ "grad_norm": 5.038397789001465,
2951
+ "learning_rate": 1.6579333528827205e-05,
2952
+ "loss": 2.4732,
2953
+ "step": 4160
2954
+ },
2955
+ {
2956
+ "epoch": 0.7591480065537958,
2957
+ "grad_norm": 5.2848076820373535,
2958
+ "learning_rate": 1.634600049256204e-05,
2959
+ "loss": 2.4651,
2960
+ "step": 4170
2961
+ },
2962
+ {
2963
+ "epoch": 0.7609685053704716,
2964
+ "grad_norm": 5.274468898773193,
2965
+ "learning_rate": 1.611399978647342e-05,
2966
+ "loss": 2.4407,
2967
+ "step": 4180
2968
+ },
2969
+ {
2970
+ "epoch": 0.7627890041871472,
2971
+ "grad_norm": 5.039272308349609,
2972
+ "learning_rate": 1.588334059525099e-05,
2973
+ "loss": 2.4892,
2974
+ "step": 4190
2975
+ },
2976
+ {
2977
+ "epoch": 0.764609503003823,
2978
+ "grad_norm": 5.7419867515563965,
2979
+ "learning_rate": 1.5654032050475138e-05,
2980
+ "loss": 2.4456,
2981
+ "step": 4200
2982
+ },
2983
+ {
2984
+ "epoch": 0.7664300018204988,
2985
+ "grad_norm": 5.30146598815918,
2986
+ "learning_rate": 1.5426083230255405e-05,
2987
+ "loss": 2.4515,
2988
+ "step": 4210
2989
+ },
2990
+ {
2991
+ "epoch": 0.7682505006371746,
2992
+ "grad_norm": 4.977199077606201,
2993
+ "learning_rate": 1.5199503158871115e-05,
2994
+ "loss": 2.4317,
2995
+ "step": 4220
2996
+ },
2997
+ {
2998
+ "epoch": 0.7700709994538504,
2999
+ "grad_norm": 5.318095684051514,
3000
+ "learning_rate": 1.4974300806414082e-05,
3001
+ "loss": 2.403,
3002
+ "step": 4230
3003
+ },
3004
+ {
3005
+ "epoch": 0.7718914982705262,
3006
+ "grad_norm": 5.638497352600098,
3007
+ "learning_rate": 1.4750485088433592e-05,
3008
+ "loss": 2.4327,
3009
+ "step": 4240
3010
+ },
3011
+ {
3012
+ "epoch": 0.7737119970872018,
3013
+ "grad_norm": 5.739340305328369,
3014
+ "learning_rate": 1.4528064865583301e-05,
3015
+ "loss": 2.4266,
3016
+ "step": 4250
3017
+ },
3018
+ {
3019
+ "epoch": 0.7755324959038776,
3020
+ "grad_norm": 4.749205112457275,
3021
+ "learning_rate": 1.4307048943270606e-05,
3022
+ "loss": 2.4136,
3023
+ "step": 4260
3024
+ },
3025
+ {
3026
+ "epoch": 0.7773529947205534,
3027
+ "grad_norm": 5.616302490234375,
3028
+ "learning_rate": 1.4087446071307903e-05,
3029
+ "loss": 2.4197,
3030
+ "step": 4270
3031
+ },
3032
+ {
3033
+ "epoch": 0.7791734935372292,
3034
+ "grad_norm": 5.402510643005371,
3035
+ "learning_rate": 1.3869264943566263e-05,
3036
+ "loss": 2.4194,
3037
+ "step": 4280
3038
+ },
3039
+ {
3040
+ "epoch": 0.780993992353905,
3041
+ "grad_norm": 5.278769493103027,
3042
+ "learning_rate": 1.3652514197631277e-05,
3043
+ "loss": 2.4351,
3044
+ "step": 4290
3045
+ },
3046
+ {
3047
+ "epoch": 0.7828144911705808,
3048
+ "grad_norm": 6.828596115112305,
3049
+ "learning_rate": 1.343720241446103e-05,
3050
+ "loss": 2.3813,
3051
+ "step": 4300
3052
+ },
3053
+ {
3054
+ "epoch": 0.7846349899872566,
3055
+ "grad_norm": 5.306332588195801,
3056
+ "learning_rate": 1.322333811804643e-05,
3057
+ "loss": 2.4133,
3058
+ "step": 4310
3059
+ },
3060
+ {
3061
+ "epoch": 0.7864554888039322,
3062
+ "grad_norm": 5.437227249145508,
3063
+ "learning_rate": 1.3010929775073765e-05,
3064
+ "loss": 2.4166,
3065
+ "step": 4320
3066
+ },
3067
+ {
3068
+ "epoch": 0.788275987620608,
3069
+ "grad_norm": 5.493254661560059,
3070
+ "learning_rate": 1.2799985794589497e-05,
3071
+ "loss": 2.3842,
3072
+ "step": 4330
3073
+ },
3074
+ {
3075
+ "epoch": 0.7900964864372838,
3076
+ "grad_norm": 5.259057521820068,
3077
+ "learning_rate": 1.2590514527667336e-05,
3078
+ "loss": 2.3783,
3079
+ "step": 4340
3080
+ },
3081
+ {
3082
+ "epoch": 0.7919169852539596,
3083
+ "grad_norm": 5.750987529754639,
3084
+ "learning_rate": 1.2382524267077645e-05,
3085
+ "loss": 2.4202,
3086
+ "step": 4350
3087
+ },
3088
+ {
3089
+ "epoch": 0.7937374840706354,
3090
+ "grad_norm": 4.952456951141357,
3091
+ "learning_rate": 1.2176023246959133e-05,
3092
+ "loss": 2.4393,
3093
+ "step": 4360
3094
+ },
3095
+ {
3096
+ "epoch": 0.7955579828873112,
3097
+ "grad_norm": 5.3008713722229,
3098
+ "learning_rate": 1.1971019642492942e-05,
3099
+ "loss": 2.375,
3100
+ "step": 4370
3101
+ },
3102
+ {
3103
+ "epoch": 0.7973784817039868,
3104
+ "grad_norm": 4.872366428375244,
3105
+ "learning_rate": 1.176752156957886e-05,
3106
+ "loss": 2.4257,
3107
+ "step": 4380
3108
+ },
3109
+ {
3110
+ "epoch": 0.7991989805206626,
3111
+ "grad_norm": 5.488797664642334,
3112
+ "learning_rate": 1.1565537084514123e-05,
3113
+ "loss": 2.4424,
3114
+ "step": 4390
3115
+ },
3116
+ {
3117
+ "epoch": 0.8010194793373384,
3118
+ "grad_norm": 5.145867824554443,
3119
+ "learning_rate": 1.1365074183674468e-05,
3120
+ "loss": 2.4806,
3121
+ "step": 4400
3122
+ },
3123
+ {
3124
+ "epoch": 0.8028399781540142,
3125
+ "grad_norm": 5.343238353729248,
3126
+ "learning_rate": 1.116614080319754e-05,
3127
+ "loss": 2.4321,
3128
+ "step": 4410
3129
+ },
3130
+ {
3131
+ "epoch": 0.80466047697069,
3132
+ "grad_norm": 5.240965366363525,
3133
+ "learning_rate": 1.0968744818668691e-05,
3134
+ "loss": 2.4358,
3135
+ "step": 4420
3136
+ },
3137
+ {
3138
+ "epoch": 0.8064809757873658,
3139
+ "grad_norm": 5.5220513343811035,
3140
+ "learning_rate": 1.0772894044809229e-05,
3141
+ "loss": 2.442,
3142
+ "step": 4430
3143
+ },
3144
+ {
3145
+ "epoch": 0.8083014746040416,
3146
+ "grad_norm": 4.8629045486450195,
3147
+ "learning_rate": 1.0578596235166998e-05,
3148
+ "loss": 2.4567,
3149
+ "step": 4440
3150
+ },
3151
+ {
3152
+ "epoch": 0.8101219734207172,
3153
+ "grad_norm": 5.297680854797363,
3154
+ "learning_rate": 1.0385859081809508e-05,
3155
+ "loss": 2.4544,
3156
+ "step": 4450
3157
+ },
3158
+ {
3159
+ "epoch": 0.811942472237393,
3160
+ "grad_norm": 5.134615898132324,
3161
+ "learning_rate": 1.0194690215019292e-05,
3162
+ "loss": 2.4656,
3163
+ "step": 4460
3164
+ },
3165
+ {
3166
+ "epoch": 0.8137629710540688,
3167
+ "grad_norm": 5.012113571166992,
3168
+ "learning_rate": 1.0005097202991948e-05,
3169
+ "loss": 2.382,
3170
+ "step": 4470
3171
+ },
3172
+ {
3173
+ "epoch": 0.8155834698707446,
3174
+ "grad_norm": 5.369142532348633,
3175
+ "learning_rate": 9.817087551536414e-06,
3176
+ "loss": 2.4584,
3177
+ "step": 4480
3178
+ },
3179
+ {
3180
+ "epoch": 0.8174039686874204,
3181
+ "grad_norm": 5.545107841491699,
3182
+ "learning_rate": 9.630668703777922e-06,
3183
+ "loss": 2.4013,
3184
+ "step": 4490
3185
+ },
3186
+ {
3187
+ "epoch": 0.8192244675040962,
3188
+ "grad_norm": 4.933434963226318,
3189
+ "learning_rate": 9.445848039863252e-06,
3190
+ "loss": 2.4516,
3191
+ "step": 4500
3192
  }
3193
  ],
3194
  "logging_steps": 10,
 
3208
  "attributes": {}
3209
  }
3210
  },
3211
+ "total_flos": 1.052165096865792e+18,
3212
  "train_batch_size": 4,
3213
  "trial_name": null,
3214
  "trial_params": null