adricl's picture
Trained Model with 14gb/2 dataset
4ec02d1
{
"best_global_step": 15000,
"best_metric": 1.6942352056503296,
"best_model_checkpoint": "/workspace/traindata/data/HuggingFace_Mistral_Transformer_Single_Instrument/run/checkpoint-15000",
"epoch": 0.258492928782326,
"eval_steps": 3000,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0017232861918821732,
"grad_norm": 0.6133952736854553,
"learning_rate": 4.125e-06,
"loss": 1.5965,
"step": 100
},
{
"epoch": 0.0034465723837643464,
"grad_norm": 0.5784599184989929,
"learning_rate": 8.291666666666667e-06,
"loss": 1.5982,
"step": 200
},
{
"epoch": 0.005169858575646519,
"grad_norm": 0.5842740535736084,
"learning_rate": 1.2458333333333334e-05,
"loss": 1.5828,
"step": 300
},
{
"epoch": 0.006893144767528693,
"grad_norm": 0.5865280032157898,
"learning_rate": 1.6625e-05,
"loss": 1.5934,
"step": 400
},
{
"epoch": 0.008616430959410866,
"grad_norm": 0.6083072423934937,
"learning_rate": 2.0791666666666666e-05,
"loss": 1.6052,
"step": 500
},
{
"epoch": 0.010339717151293039,
"grad_norm": 0.5992451906204224,
"learning_rate": 2.4958333333333335e-05,
"loss": 1.5995,
"step": 600
},
{
"epoch": 0.012063003343175211,
"grad_norm": 0.6140967011451721,
"learning_rate": 2.9125000000000003e-05,
"loss": 1.5791,
"step": 700
},
{
"epoch": 0.013786289535057386,
"grad_norm": 0.6324509382247925,
"learning_rate": 3.329166666666667e-05,
"loss": 1.6014,
"step": 800
},
{
"epoch": 0.015509575726939558,
"grad_norm": 0.6500518918037415,
"learning_rate": 3.7458333333333334e-05,
"loss": 1.6042,
"step": 900
},
{
"epoch": 0.017232861918821733,
"grad_norm": 0.6341643929481506,
"learning_rate": 4.1625e-05,
"loss": 1.5796,
"step": 1000
},
{
"epoch": 0.018956148110703903,
"grad_norm": 0.6603251099586487,
"learning_rate": 4.579166666666667e-05,
"loss": 1.5855,
"step": 1100
},
{
"epoch": 0.020679434302586078,
"grad_norm": 0.7315922379493713,
"learning_rate": 4.995833333333333e-05,
"loss": 1.5976,
"step": 1200
},
{
"epoch": 0.022402720494468252,
"grad_norm": 0.6418182849884033,
"learning_rate": 5.4125e-05,
"loss": 1.5834,
"step": 1300
},
{
"epoch": 0.024126006686350423,
"grad_norm": 0.6903438568115234,
"learning_rate": 5.829166666666667e-05,
"loss": 1.6235,
"step": 1400
},
{
"epoch": 0.025849292878232597,
"grad_norm": 0.6109316945075989,
"learning_rate": 6.245833333333334e-05,
"loss": 1.6143,
"step": 1500
},
{
"epoch": 0.02757257907011477,
"grad_norm": 0.6458160281181335,
"learning_rate": 6.6625e-05,
"loss": 1.609,
"step": 1600
},
{
"epoch": 0.029295865261996946,
"grad_norm": 0.6940888166427612,
"learning_rate": 7.079166666666666e-05,
"loss": 1.6048,
"step": 1700
},
{
"epoch": 0.031019151453879117,
"grad_norm": 0.6740911602973938,
"learning_rate": 7.495833333333334e-05,
"loss": 1.6116,
"step": 1800
},
{
"epoch": 0.03274243764576129,
"grad_norm": 0.634560763835907,
"learning_rate": 7.9125e-05,
"loss": 1.5999,
"step": 1900
},
{
"epoch": 0.034465723837643465,
"grad_norm": 0.677970826625824,
"learning_rate": 8.329166666666667e-05,
"loss": 1.6104,
"step": 2000
},
{
"epoch": 0.03618901002952564,
"grad_norm": 0.6901321411132812,
"learning_rate": 8.745833333333334e-05,
"loss": 1.6018,
"step": 2100
},
{
"epoch": 0.03791229622140781,
"grad_norm": 0.6881032586097717,
"learning_rate": 9.1625e-05,
"loss": 1.6303,
"step": 2200
},
{
"epoch": 0.03963558241328998,
"grad_norm": 0.6821079254150391,
"learning_rate": 9.579166666666667e-05,
"loss": 1.6207,
"step": 2300
},
{
"epoch": 0.041358868605172155,
"grad_norm": 0.7254959940910339,
"learning_rate": 9.995833333333334e-05,
"loss": 1.6106,
"step": 2400
},
{
"epoch": 0.04308215479705433,
"grad_norm": 0.7417749166488647,
"learning_rate": 9.99968254119042e-05,
"loss": 1.6141,
"step": 2500
},
{
"epoch": 0.044805440988936504,
"grad_norm": 0.6578373312950134,
"learning_rate": 9.998717347022716e-05,
"loss": 1.6214,
"step": 2600
},
{
"epoch": 0.04652872718081868,
"grad_norm": 0.6432535648345947,
"learning_rate": 9.997104510158365e-05,
"loss": 1.6303,
"step": 2700
},
{
"epoch": 0.048252013372700846,
"grad_norm": 0.6907160878181458,
"learning_rate": 9.994844239559375e-05,
"loss": 1.6105,
"step": 2800
},
{
"epoch": 0.04997529956458302,
"grad_norm": 0.7411105036735535,
"learning_rate": 9.991936828070421e-05,
"loss": 1.629,
"step": 2900
},
{
"epoch": 0.051698585756465194,
"grad_norm": 0.6869089603424072,
"learning_rate": 9.988382652380897e-05,
"loss": 1.6249,
"step": 3000
},
{
"epoch": 0.05342187194834737,
"grad_norm": 0.662797212600708,
"learning_rate": 9.984182172976115e-05,
"loss": 1.633,
"step": 3100
},
{
"epoch": 0.05514515814022954,
"grad_norm": 0.6771135926246643,
"learning_rate": 9.979335934077652e-05,
"loss": 1.6243,
"step": 3200
},
{
"epoch": 0.05686844433211172,
"grad_norm": 0.6237235069274902,
"learning_rate": 9.97384456357282e-05,
"loss": 1.6184,
"step": 3300
},
{
"epoch": 0.05859173052399389,
"grad_norm": 0.6165594458580017,
"learning_rate": 9.967708772933339e-05,
"loss": 1.6178,
"step": 3400
},
{
"epoch": 0.06031501671587606,
"grad_norm": 0.6119577884674072,
"learning_rate": 9.960929357123137e-05,
"loss": 1.6171,
"step": 3500
},
{
"epoch": 0.06203830290775823,
"grad_norm": 0.6392346024513245,
"learning_rate": 9.953507194495366e-05,
"loss": 1.6283,
"step": 3600
},
{
"epoch": 0.06376158909964041,
"grad_norm": 0.7036736607551575,
"learning_rate": 9.945443246678599e-05,
"loss": 1.6278,
"step": 3700
},
{
"epoch": 0.06548487529152258,
"grad_norm": 0.7011469006538391,
"learning_rate": 9.936738558452233e-05,
"loss": 1.6087,
"step": 3800
},
{
"epoch": 0.06720816148340475,
"grad_norm": 0.6176936030387878,
"learning_rate": 9.927394257611137e-05,
"loss": 1.6285,
"step": 3900
},
{
"epoch": 0.06893144767528693,
"grad_norm": 0.6255926489830017,
"learning_rate": 9.91741155481952e-05,
"loss": 1.618,
"step": 4000
},
{
"epoch": 0.0706547338671691,
"grad_norm": 0.6118465065956116,
"learning_rate": 9.906791743454082e-05,
"loss": 1.629,
"step": 4100
},
{
"epoch": 0.07237802005905128,
"grad_norm": 0.6299500465393066,
"learning_rate": 9.895536199436449e-05,
"loss": 1.6328,
"step": 4200
},
{
"epoch": 0.07410130625093345,
"grad_norm": 0.7682228684425354,
"learning_rate": 9.883646381054886e-05,
"loss": 1.5985,
"step": 4300
},
{
"epoch": 0.07582459244281561,
"grad_norm": 0.5980575084686279,
"learning_rate": 9.871123828775381e-05,
"loss": 1.6114,
"step": 4400
},
{
"epoch": 0.0775478786346978,
"grad_norm": 0.5819905400276184,
"learning_rate": 9.857970165042046e-05,
"loss": 1.622,
"step": 4500
},
{
"epoch": 0.07927116482657996,
"grad_norm": 0.8043591976165771,
"learning_rate": 9.844187094066913e-05,
"loss": 1.5978,
"step": 4600
},
{
"epoch": 0.08099445101846214,
"grad_norm": 0.6172861456871033,
"learning_rate": 9.829776401609134e-05,
"loss": 1.5887,
"step": 4700
},
{
"epoch": 0.08271773721034431,
"grad_norm": 0.6270127296447754,
"learning_rate": 9.814739954743617e-05,
"loss": 1.5971,
"step": 4800
},
{
"epoch": 0.08444102340222649,
"grad_norm": 0.6117558479309082,
"learning_rate": 9.79907970161912e-05,
"loss": 1.6033,
"step": 4900
},
{
"epoch": 0.08616430959410866,
"grad_norm": 0.6499077677726746,
"learning_rate": 9.78279767120585e-05,
"loss": 1.6129,
"step": 5000
},
{
"epoch": 0.08788759578599083,
"grad_norm": 0.5596637725830078,
"learning_rate": 9.76589597303258e-05,
"loss": 1.6211,
"step": 5100
},
{
"epoch": 0.08961088197787301,
"grad_norm": 0.5757789015769958,
"learning_rate": 9.748376796913344e-05,
"loss": 1.6225,
"step": 5200
},
{
"epoch": 0.09133416816975518,
"grad_norm": 0.6331895589828491,
"learning_rate": 9.730242412663709e-05,
"loss": 1.5732,
"step": 5300
},
{
"epoch": 0.09305745436163736,
"grad_norm": 0.5809708833694458,
"learning_rate": 9.711495169806705e-05,
"loss": 1.6233,
"step": 5400
},
{
"epoch": 0.09478074055351952,
"grad_norm": 0.8100622296333313,
"learning_rate": 9.69213749726841e-05,
"loss": 1.6118,
"step": 5500
},
{
"epoch": 0.09650402674540169,
"grad_norm": 0.5590764284133911,
"learning_rate": 9.672171903063253e-05,
"loss": 1.5967,
"step": 5600
},
{
"epoch": 0.09822731293728387,
"grad_norm": 0.5601252317428589,
"learning_rate": 9.651600973969077e-05,
"loss": 1.6189,
"step": 5700
},
{
"epoch": 0.09995059912916604,
"grad_norm": 0.5881779193878174,
"learning_rate": 9.630427375191989e-05,
"loss": 1.6133,
"step": 5800
},
{
"epoch": 0.10167388532104822,
"grad_norm": 0.5713782906532288,
"learning_rate": 9.608653850021045e-05,
"loss": 1.5912,
"step": 5900
},
{
"epoch": 0.10339717151293039,
"grad_norm": 0.5922852754592896,
"learning_rate": 9.586283219472836e-05,
"loss": 1.6022,
"step": 6000
},
{
"epoch": 0.10339717151293039,
"eval_accuracy": 0.010507585123200762,
"eval_loss": 1.751858115196228,
"eval_runtime": 1766.3832,
"eval_samples_per_second": 481.157,
"eval_steps_per_second": 15.036,
"step": 6000
},
{
"epoch": 0.10512045770481257,
"grad_norm": 0.5831886529922485,
"learning_rate": 9.563318381925982e-05,
"loss": 1.5952,
"step": 6100
},
{
"epoch": 0.10684374389669474,
"grad_norm": 0.6007715463638306,
"learning_rate": 9.539762312745619e-05,
"loss": 1.6048,
"step": 6200
},
{
"epoch": 0.1085670300885769,
"grad_norm": 0.6652824282646179,
"learning_rate": 9.515618063897906e-05,
"loss": 1.6103,
"step": 6300
},
{
"epoch": 0.11029031628045909,
"grad_norm": 0.5715838670730591,
"learning_rate": 9.490888763554596e-05,
"loss": 1.5917,
"step": 6400
},
{
"epoch": 0.11201360247234125,
"grad_norm": 0.5438059568405151,
"learning_rate": 9.465577615687761e-05,
"loss": 1.6222,
"step": 6500
},
{
"epoch": 0.11373688866422343,
"grad_norm": 0.5527771711349487,
"learning_rate": 9.43968789965467e-05,
"loss": 1.588,
"step": 6600
},
{
"epoch": 0.1154601748561056,
"grad_norm": 0.6176398396492004,
"learning_rate": 9.413222969772906e-05,
"loss": 1.5747,
"step": 6700
},
{
"epoch": 0.11718346104798778,
"grad_norm": 0.589585542678833,
"learning_rate": 9.386186254885783e-05,
"loss": 1.5938,
"step": 6800
},
{
"epoch": 0.11890674723986995,
"grad_norm": 0.5356398820877075,
"learning_rate": 9.3585812579181e-05,
"loss": 1.5822,
"step": 6900
},
{
"epoch": 0.12063003343175212,
"grad_norm": 0.6897734999656677,
"learning_rate": 9.330411555422285e-05,
"loss": 1.5982,
"step": 7000
},
{
"epoch": 0.1223533196236343,
"grad_norm": 0.5214916467666626,
"learning_rate": 9.30168079711502e-05,
"loss": 1.5898,
"step": 7100
},
{
"epoch": 0.12407660581551647,
"grad_norm": 0.5990198850631714,
"learning_rate": 9.272392705404372e-05,
"loss": 1.6032,
"step": 7200
},
{
"epoch": 0.12579989200739863,
"grad_norm": 0.531247079372406,
"learning_rate": 9.242551074907519e-05,
"loss": 1.6082,
"step": 7300
},
{
"epoch": 0.12752317819928083,
"grad_norm": 0.607933521270752,
"learning_rate": 9.212159771959101e-05,
"loss": 1.5817,
"step": 7400
},
{
"epoch": 0.129246464391163,
"grad_norm": 0.5464344024658203,
"learning_rate": 9.181222734110301e-05,
"loss": 1.5759,
"step": 7500
},
{
"epoch": 0.13096975058304516,
"grad_norm": 0.6487947106361389,
"learning_rate": 9.149743969618683e-05,
"loss": 1.6067,
"step": 7600
},
{
"epoch": 0.13269303677492733,
"grad_norm": 0.556429922580719,
"learning_rate": 9.117727556928875e-05,
"loss": 1.5863,
"step": 7700
},
{
"epoch": 0.1344163229668095,
"grad_norm": 0.5772918462753296,
"learning_rate": 9.085177644144167e-05,
"loss": 1.5888,
"step": 7800
},
{
"epoch": 0.1361396091586917,
"grad_norm": 0.5730582475662231,
"learning_rate": 9.052098448489062e-05,
"loss": 1.5983,
"step": 7900
},
{
"epoch": 0.13786289535057386,
"grad_norm": 0.5221332311630249,
"learning_rate": 9.018494255762894e-05,
"loss": 1.5757,
"step": 8000
},
{
"epoch": 0.13958618154245603,
"grad_norm": 0.5817165970802307,
"learning_rate": 8.98436941978455e-05,
"loss": 1.5813,
"step": 8100
},
{
"epoch": 0.1413094677343382,
"grad_norm": 0.5672810673713684,
"learning_rate": 8.949728361828381e-05,
"loss": 1.5997,
"step": 8200
},
{
"epoch": 0.14303275392622036,
"grad_norm": 0.5768831372261047,
"learning_rate": 8.914575570051375e-05,
"loss": 1.5707,
"step": 8300
},
{
"epoch": 0.14475604011810256,
"grad_norm": 0.6032638549804688,
"learning_rate": 8.878915598911664e-05,
"loss": 1.5892,
"step": 8400
},
{
"epoch": 0.14647932630998473,
"grad_norm": 0.5976369976997375,
"learning_rate": 8.842753068578434e-05,
"loss": 1.5996,
"step": 8500
},
{
"epoch": 0.1482026125018669,
"grad_norm": 0.559442400932312,
"learning_rate": 8.806092664333333e-05,
"loss": 1.5813,
"step": 8600
},
{
"epoch": 0.14992589869374906,
"grad_norm": 0.516207218170166,
"learning_rate": 8.768939135963447e-05,
"loss": 1.5742,
"step": 8700
},
{
"epoch": 0.15164918488563123,
"grad_norm": 0.553333044052124,
"learning_rate": 8.731297297145889e-05,
"loss": 1.585,
"step": 8800
},
{
"epoch": 0.15337247107751342,
"grad_norm": 0.5709084868431091,
"learning_rate": 8.693172024824143e-05,
"loss": 1.5811,
"step": 8900
},
{
"epoch": 0.1550957572693956,
"grad_norm": 0.52576744556427,
"learning_rate": 8.654568258576197e-05,
"loss": 1.5843,
"step": 9000
},
{
"epoch": 0.1550957572693956,
"eval_accuracy": 0.010426478228323498,
"eval_loss": 1.732275128364563,
"eval_runtime": 1695.9476,
"eval_samples_per_second": 501.14,
"eval_steps_per_second": 15.661,
"step": 9000
},
{
"epoch": 0.15681904346127776,
"grad_norm": 0.5634833574295044,
"learning_rate": 8.615490999974563e-05,
"loss": 1.5927,
"step": 9100
},
{
"epoch": 0.15854232965315992,
"grad_norm": 0.5738709568977356,
"learning_rate": 8.575945311938262e-05,
"loss": 1.6131,
"step": 9200
},
{
"epoch": 0.1602656158450421,
"grad_norm": 0.5828307867050171,
"learning_rate": 8.535936318076864e-05,
"loss": 1.5766,
"step": 9300
},
{
"epoch": 0.1619889020369243,
"grad_norm": 0.580729603767395,
"learning_rate": 8.495469202026669e-05,
"loss": 1.5902,
"step": 9400
},
{
"epoch": 0.16371218822880645,
"grad_norm": 0.568894624710083,
"learning_rate": 8.454549206779092e-05,
"loss": 1.5671,
"step": 9500
},
{
"epoch": 0.16543547442068862,
"grad_norm": 0.5564482808113098,
"learning_rate": 8.413181634001391e-05,
"loss": 1.5778,
"step": 9600
},
{
"epoch": 0.1671587606125708,
"grad_norm": 0.5514076948165894,
"learning_rate": 8.371371843349755e-05,
"loss": 1.5874,
"step": 9700
},
{
"epoch": 0.16888204680445298,
"grad_norm": 0.5865207314491272,
"learning_rate": 8.329125251774916e-05,
"loss": 1.5637,
"step": 9800
},
{
"epoch": 0.17060533299633515,
"grad_norm": 0.5577490329742432,
"learning_rate": 8.286447332820298e-05,
"loss": 1.5801,
"step": 9900
},
{
"epoch": 0.17232861918821732,
"grad_norm": 0.5502321124076843,
"learning_rate": 8.243343615912877e-05,
"loss": 1.5695,
"step": 10000
},
{
"epoch": 0.1740519053800995,
"grad_norm": 0.5614681243896484,
"learning_rate": 8.199819685646759e-05,
"loss": 1.5892,
"step": 10100
},
{
"epoch": 0.17577519157198165,
"grad_norm": 0.5736984014511108,
"learning_rate": 8.155881181059644e-05,
"loss": 1.5911,
"step": 10200
},
{
"epoch": 0.17749847776386385,
"grad_norm": 0.49306830763816833,
"learning_rate": 8.111533794902217e-05,
"loss": 1.5481,
"step": 10300
},
{
"epoch": 0.17922176395574602,
"grad_norm": 0.5843108892440796,
"learning_rate": 8.066783272900586e-05,
"loss": 1.582,
"step": 10400
},
{
"epoch": 0.18094505014762818,
"grad_norm": 0.5754996538162231,
"learning_rate": 8.02163541301185e-05,
"loss": 1.5885,
"step": 10500
},
{
"epoch": 0.18266833633951035,
"grad_norm": 0.5479796528816223,
"learning_rate": 7.976096064672915e-05,
"loss": 1.5693,
"step": 10600
},
{
"epoch": 0.18439162253139252,
"grad_norm": 0.5987735390663147,
"learning_rate": 7.930171128042627e-05,
"loss": 1.5679,
"step": 10700
},
{
"epoch": 0.1861149087232747,
"grad_norm": 0.5608052611351013,
"learning_rate": 7.88386655323733e-05,
"loss": 1.5559,
"step": 10800
},
{
"epoch": 0.18783819491515688,
"grad_norm": 0.5474194288253784,
"learning_rate": 7.83718833955997e-05,
"loss": 1.5846,
"step": 10900
},
{
"epoch": 0.18956148110703905,
"grad_norm": 0.5139473676681519,
"learning_rate": 7.790142534722805e-05,
"loss": 1.5715,
"step": 11000
},
{
"epoch": 0.19128476729892122,
"grad_norm": 0.5175371170043945,
"learning_rate": 7.742735234063859e-05,
"loss": 1.5864,
"step": 11100
},
{
"epoch": 0.19300805349080338,
"grad_norm": 0.5598956942558289,
"learning_rate": 7.694972579757193e-05,
"loss": 1.5373,
"step": 11200
},
{
"epoch": 0.19473133968268558,
"grad_norm": 0.5191853642463684,
"learning_rate": 7.646860760017124e-05,
"loss": 1.5573,
"step": 11300
},
{
"epoch": 0.19645462587456775,
"grad_norm": 0.5062898993492126,
"learning_rate": 7.598406008296456e-05,
"loss": 1.5797,
"step": 11400
},
{
"epoch": 0.1981779120664499,
"grad_norm": 0.5880659222602844,
"learning_rate": 7.549614602478872e-05,
"loss": 1.558,
"step": 11500
},
{
"epoch": 0.19990119825833208,
"grad_norm": 0.5346918702125549,
"learning_rate": 7.500492864065559e-05,
"loss": 1.562,
"step": 11600
},
{
"epoch": 0.20162448445021428,
"grad_norm": 0.5520205497741699,
"learning_rate": 7.451047157356182e-05,
"loss": 1.5458,
"step": 11700
},
{
"epoch": 0.20334777064209644,
"grad_norm": 0.5452098250389099,
"learning_rate": 7.401283888624307e-05,
"loss": 1.5783,
"step": 11800
},
{
"epoch": 0.2050710568339786,
"grad_norm": 0.5486232042312622,
"learning_rate": 7.351209505287412e-05,
"loss": 1.5635,
"step": 11900
},
{
"epoch": 0.20679434302586078,
"grad_norm": 0.5769017934799194,
"learning_rate": 7.300830495071524e-05,
"loss": 1.5473,
"step": 12000
},
{
"epoch": 0.20679434302586078,
"eval_accuracy": 0.010505145107655028,
"eval_loss": 1.7127723693847656,
"eval_runtime": 1714.6588,
"eval_samples_per_second": 495.671,
"eval_steps_per_second": 15.49,
"step": 12000
},
{
"epoch": 0.20851762921774294,
"grad_norm": 0.5299004912376404,
"learning_rate": 7.250153385170675e-05,
"loss": 1.5631,
"step": 12100
},
{
"epoch": 0.21024091540962514,
"grad_norm": 0.6350430250167847,
"learning_rate": 7.199184741401222e-05,
"loss": 1.5484,
"step": 12200
},
{
"epoch": 0.2119642016015073,
"grad_norm": 0.5689346790313721,
"learning_rate": 7.147931167351162e-05,
"loss": 1.5616,
"step": 12300
},
{
"epoch": 0.21368748779338947,
"grad_norm": 0.5793879628181458,
"learning_rate": 7.096399303524577e-05,
"loss": 1.5496,
"step": 12400
},
{
"epoch": 0.21541077398527164,
"grad_norm": 0.5497804284095764,
"learning_rate": 7.044595826481253e-05,
"loss": 1.5667,
"step": 12500
},
{
"epoch": 0.2171340601771538,
"grad_norm": 0.5804843902587891,
"learning_rate": 6.992527447971677e-05,
"loss": 1.5586,
"step": 12600
},
{
"epoch": 0.218857346369036,
"grad_norm": 0.5805226564407349,
"learning_rate": 6.940200914067431e-05,
"loss": 1.5428,
"step": 12700
},
{
"epoch": 0.22058063256091817,
"grad_norm": 0.5112205743789673,
"learning_rate": 6.887623004287182e-05,
"loss": 1.5597,
"step": 12800
},
{
"epoch": 0.22230391875280034,
"grad_norm": 0.5555017590522766,
"learning_rate": 6.834800530718285e-05,
"loss": 1.5349,
"step": 12900
},
{
"epoch": 0.2240272049446825,
"grad_norm": 0.5393018126487732,
"learning_rate": 6.781740337134229e-05,
"loss": 1.5392,
"step": 13000
},
{
"epoch": 0.22575049113656467,
"grad_norm": 0.515864372253418,
"learning_rate": 6.728449298107919e-05,
"loss": 1.5617,
"step": 13100
},
{
"epoch": 0.22747377732844687,
"grad_norm": 0.5203471779823303,
"learning_rate": 6.674934318121013e-05,
"loss": 1.5492,
"step": 13200
},
{
"epoch": 0.22919706352032904,
"grad_norm": 0.5489692091941833,
"learning_rate": 6.621202330669354e-05,
"loss": 1.544,
"step": 13300
},
{
"epoch": 0.2309203497122112,
"grad_norm": 0.5596274137496948,
"learning_rate": 6.567260297364659e-05,
"loss": 1.5463,
"step": 13400
},
{
"epoch": 0.23264363590409337,
"grad_norm": 0.5610251426696777,
"learning_rate": 6.513115207032557e-05,
"loss": 1.5802,
"step": 13500
},
{
"epoch": 0.23436692209597557,
"grad_norm": 0.5264619588851929,
"learning_rate": 6.458774074807107e-05,
"loss": 1.5545,
"step": 13600
},
{
"epoch": 0.23609020828785773,
"grad_norm": 0.5814141631126404,
"learning_rate": 6.404243941221903e-05,
"loss": 1.5521,
"step": 13700
},
{
"epoch": 0.2378134944797399,
"grad_norm": 0.5240880846977234,
"learning_rate": 6.349531871297896e-05,
"loss": 1.5675,
"step": 13800
},
{
"epoch": 0.23953678067162207,
"grad_norm": 0.4984951913356781,
"learning_rate": 6.294644953628023e-05,
"loss": 1.5559,
"step": 13900
},
{
"epoch": 0.24126006686350424,
"grad_norm": 0.5289067029953003,
"learning_rate": 6.239590299458814e-05,
"loss": 1.5285,
"step": 14000
},
{
"epoch": 0.24298335305538643,
"grad_norm": 0.5221706032752991,
"learning_rate": 6.184375041769032e-05,
"loss": 1.553,
"step": 14100
},
{
"epoch": 0.2447066392472686,
"grad_norm": 0.5475857257843018,
"learning_rate": 6.12900633434552e-05,
"loss": 1.5675,
"step": 14200
},
{
"epoch": 0.24642992543915077,
"grad_norm": 0.5271047353744507,
"learning_rate": 6.0734913508563395e-05,
"loss": 1.5487,
"step": 14300
},
{
"epoch": 0.24815321163103293,
"grad_norm": 0.5180040001869202,
"learning_rate": 6.0178372839213406e-05,
"loss": 1.5281,
"step": 14400
},
{
"epoch": 0.2498764978229151,
"grad_norm": 0.566608726978302,
"learning_rate": 5.9620513441802714e-05,
"loss": 1.5602,
"step": 14500
},
{
"epoch": 0.25159978401479727,
"grad_norm": 0.5131779909133911,
"learning_rate": 5.906140759358555e-05,
"loss": 1.5111,
"step": 14600
},
{
"epoch": 0.25332307020667943,
"grad_norm": 0.5626484751701355,
"learning_rate": 5.85011277333085e-05,
"loss": 1.5528,
"step": 14700
},
{
"epoch": 0.25504635639856166,
"grad_norm": 0.550121545791626,
"learning_rate": 5.793974645182526e-05,
"loss": 1.5401,
"step": 14800
},
{
"epoch": 0.2567696425904438,
"grad_norm": 0.5408352017402649,
"learning_rate": 5.737733648269162e-05,
"loss": 1.5437,
"step": 14900
},
{
"epoch": 0.258492928782326,
"grad_norm": 0.5391642451286316,
"learning_rate": 5.6813970692741945e-05,
"loss": 1.5407,
"step": 15000
},
{
"epoch": 0.258492928782326,
"eval_accuracy": 0.010407141521982707,
"eval_loss": 1.6942352056503296,
"eval_runtime": 1748.5708,
"eval_samples_per_second": 486.058,
"eval_steps_per_second": 15.19,
"step": 15000
}
],
"logging_steps": 100,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.041448973814661e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}