| { | |
| "best_global_step": 15000, | |
| "best_metric": 1.6942352056503296, | |
| "best_model_checkpoint": "/workspace/traindata/data/HuggingFace_Mistral_Transformer_Single_Instrument/run/checkpoint-15000", | |
| "epoch": 0.258492928782326, | |
| "eval_steps": 3000, | |
| "global_step": 15000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017232861918821732, | |
| "grad_norm": 0.6133952736854553, | |
| "learning_rate": 4.125e-06, | |
| "loss": 1.5965, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0034465723837643464, | |
| "grad_norm": 0.5784599184989929, | |
| "learning_rate": 8.291666666666667e-06, | |
| "loss": 1.5982, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.005169858575646519, | |
| "grad_norm": 0.5842740535736084, | |
| "learning_rate": 1.2458333333333334e-05, | |
| "loss": 1.5828, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.006893144767528693, | |
| "grad_norm": 0.5865280032157898, | |
| "learning_rate": 1.6625e-05, | |
| "loss": 1.5934, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.008616430959410866, | |
| "grad_norm": 0.6083072423934937, | |
| "learning_rate": 2.0791666666666666e-05, | |
| "loss": 1.6052, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.010339717151293039, | |
| "grad_norm": 0.5992451906204224, | |
| "learning_rate": 2.4958333333333335e-05, | |
| "loss": 1.5995, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.012063003343175211, | |
| "grad_norm": 0.6140967011451721, | |
| "learning_rate": 2.9125000000000003e-05, | |
| "loss": 1.5791, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.013786289535057386, | |
| "grad_norm": 0.6324509382247925, | |
| "learning_rate": 3.329166666666667e-05, | |
| "loss": 1.6014, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.015509575726939558, | |
| "grad_norm": 0.6500518918037415, | |
| "learning_rate": 3.7458333333333334e-05, | |
| "loss": 1.6042, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.017232861918821733, | |
| "grad_norm": 0.6341643929481506, | |
| "learning_rate": 4.1625e-05, | |
| "loss": 1.5796, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.018956148110703903, | |
| "grad_norm": 0.6603251099586487, | |
| "learning_rate": 4.579166666666667e-05, | |
| "loss": 1.5855, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.020679434302586078, | |
| "grad_norm": 0.7315922379493713, | |
| "learning_rate": 4.995833333333333e-05, | |
| "loss": 1.5976, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.022402720494468252, | |
| "grad_norm": 0.6418182849884033, | |
| "learning_rate": 5.4125e-05, | |
| "loss": 1.5834, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.024126006686350423, | |
| "grad_norm": 0.6903438568115234, | |
| "learning_rate": 5.829166666666667e-05, | |
| "loss": 1.6235, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.025849292878232597, | |
| "grad_norm": 0.6109316945075989, | |
| "learning_rate": 6.245833333333334e-05, | |
| "loss": 1.6143, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02757257907011477, | |
| "grad_norm": 0.6458160281181335, | |
| "learning_rate": 6.6625e-05, | |
| "loss": 1.609, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.029295865261996946, | |
| "grad_norm": 0.6940888166427612, | |
| "learning_rate": 7.079166666666666e-05, | |
| "loss": 1.6048, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.031019151453879117, | |
| "grad_norm": 0.6740911602973938, | |
| "learning_rate": 7.495833333333334e-05, | |
| "loss": 1.6116, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.03274243764576129, | |
| "grad_norm": 0.634560763835907, | |
| "learning_rate": 7.9125e-05, | |
| "loss": 1.5999, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.034465723837643465, | |
| "grad_norm": 0.677970826625824, | |
| "learning_rate": 8.329166666666667e-05, | |
| "loss": 1.6104, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03618901002952564, | |
| "grad_norm": 0.6901321411132812, | |
| "learning_rate": 8.745833333333334e-05, | |
| "loss": 1.6018, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.03791229622140781, | |
| "grad_norm": 0.6881032586097717, | |
| "learning_rate": 9.1625e-05, | |
| "loss": 1.6303, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.03963558241328998, | |
| "grad_norm": 0.6821079254150391, | |
| "learning_rate": 9.579166666666667e-05, | |
| "loss": 1.6207, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.041358868605172155, | |
| "grad_norm": 0.7254959940910339, | |
| "learning_rate": 9.995833333333334e-05, | |
| "loss": 1.6106, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04308215479705433, | |
| "grad_norm": 0.7417749166488647, | |
| "learning_rate": 9.99968254119042e-05, | |
| "loss": 1.6141, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.044805440988936504, | |
| "grad_norm": 0.6578373312950134, | |
| "learning_rate": 9.998717347022716e-05, | |
| "loss": 1.6214, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.04652872718081868, | |
| "grad_norm": 0.6432535648345947, | |
| "learning_rate": 9.997104510158365e-05, | |
| "loss": 1.6303, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.048252013372700846, | |
| "grad_norm": 0.6907160878181458, | |
| "learning_rate": 9.994844239559375e-05, | |
| "loss": 1.6105, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.04997529956458302, | |
| "grad_norm": 0.7411105036735535, | |
| "learning_rate": 9.991936828070421e-05, | |
| "loss": 1.629, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.051698585756465194, | |
| "grad_norm": 0.6869089603424072, | |
| "learning_rate": 9.988382652380897e-05, | |
| "loss": 1.6249, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05342187194834737, | |
| "grad_norm": 0.662797212600708, | |
| "learning_rate": 9.984182172976115e-05, | |
| "loss": 1.633, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.05514515814022954, | |
| "grad_norm": 0.6771135926246643, | |
| "learning_rate": 9.979335934077652e-05, | |
| "loss": 1.6243, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.05686844433211172, | |
| "grad_norm": 0.6237235069274902, | |
| "learning_rate": 9.97384456357282e-05, | |
| "loss": 1.6184, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.05859173052399389, | |
| "grad_norm": 0.6165594458580017, | |
| "learning_rate": 9.967708772933339e-05, | |
| "loss": 1.6178, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.06031501671587606, | |
| "grad_norm": 0.6119577884674072, | |
| "learning_rate": 9.960929357123137e-05, | |
| "loss": 1.6171, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.06203830290775823, | |
| "grad_norm": 0.6392346024513245, | |
| "learning_rate": 9.953507194495366e-05, | |
| "loss": 1.6283, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.06376158909964041, | |
| "grad_norm": 0.7036736607551575, | |
| "learning_rate": 9.945443246678599e-05, | |
| "loss": 1.6278, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.06548487529152258, | |
| "grad_norm": 0.7011469006538391, | |
| "learning_rate": 9.936738558452233e-05, | |
| "loss": 1.6087, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.06720816148340475, | |
| "grad_norm": 0.6176936030387878, | |
| "learning_rate": 9.927394257611137e-05, | |
| "loss": 1.6285, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.06893144767528693, | |
| "grad_norm": 0.6255926489830017, | |
| "learning_rate": 9.91741155481952e-05, | |
| "loss": 1.618, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0706547338671691, | |
| "grad_norm": 0.6118465065956116, | |
| "learning_rate": 9.906791743454082e-05, | |
| "loss": 1.629, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.07237802005905128, | |
| "grad_norm": 0.6299500465393066, | |
| "learning_rate": 9.895536199436449e-05, | |
| "loss": 1.6328, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.07410130625093345, | |
| "grad_norm": 0.7682228684425354, | |
| "learning_rate": 9.883646381054886e-05, | |
| "loss": 1.5985, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.07582459244281561, | |
| "grad_norm": 0.5980575084686279, | |
| "learning_rate": 9.871123828775381e-05, | |
| "loss": 1.6114, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.0775478786346978, | |
| "grad_norm": 0.5819905400276184, | |
| "learning_rate": 9.857970165042046e-05, | |
| "loss": 1.622, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.07927116482657996, | |
| "grad_norm": 0.8043591976165771, | |
| "learning_rate": 9.844187094066913e-05, | |
| "loss": 1.5978, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.08099445101846214, | |
| "grad_norm": 0.6172861456871033, | |
| "learning_rate": 9.829776401609134e-05, | |
| "loss": 1.5887, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.08271773721034431, | |
| "grad_norm": 0.6270127296447754, | |
| "learning_rate": 9.814739954743617e-05, | |
| "loss": 1.5971, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.08444102340222649, | |
| "grad_norm": 0.6117558479309082, | |
| "learning_rate": 9.79907970161912e-05, | |
| "loss": 1.6033, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.08616430959410866, | |
| "grad_norm": 0.6499077677726746, | |
| "learning_rate": 9.78279767120585e-05, | |
| "loss": 1.6129, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.08788759578599083, | |
| "grad_norm": 0.5596637725830078, | |
| "learning_rate": 9.76589597303258e-05, | |
| "loss": 1.6211, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.08961088197787301, | |
| "grad_norm": 0.5757789015769958, | |
| "learning_rate": 9.748376796913344e-05, | |
| "loss": 1.6225, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.09133416816975518, | |
| "grad_norm": 0.6331895589828491, | |
| "learning_rate": 9.730242412663709e-05, | |
| "loss": 1.5732, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.09305745436163736, | |
| "grad_norm": 0.5809708833694458, | |
| "learning_rate": 9.711495169806705e-05, | |
| "loss": 1.6233, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.09478074055351952, | |
| "grad_norm": 0.8100622296333313, | |
| "learning_rate": 9.69213749726841e-05, | |
| "loss": 1.6118, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.09650402674540169, | |
| "grad_norm": 0.5590764284133911, | |
| "learning_rate": 9.672171903063253e-05, | |
| "loss": 1.5967, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.09822731293728387, | |
| "grad_norm": 0.5601252317428589, | |
| "learning_rate": 9.651600973969077e-05, | |
| "loss": 1.6189, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.09995059912916604, | |
| "grad_norm": 0.5881779193878174, | |
| "learning_rate": 9.630427375191989e-05, | |
| "loss": 1.6133, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.10167388532104822, | |
| "grad_norm": 0.5713782906532288, | |
| "learning_rate": 9.608653850021045e-05, | |
| "loss": 1.5912, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.10339717151293039, | |
| "grad_norm": 0.5922852754592896, | |
| "learning_rate": 9.586283219472836e-05, | |
| "loss": 1.6022, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.10339717151293039, | |
| "eval_accuracy": 0.010507585123200762, | |
| "eval_loss": 1.751858115196228, | |
| "eval_runtime": 1766.3832, | |
| "eval_samples_per_second": 481.157, | |
| "eval_steps_per_second": 15.036, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.10512045770481257, | |
| "grad_norm": 0.5831886529922485, | |
| "learning_rate": 9.563318381925982e-05, | |
| "loss": 1.5952, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.10684374389669474, | |
| "grad_norm": 0.6007715463638306, | |
| "learning_rate": 9.539762312745619e-05, | |
| "loss": 1.6048, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.1085670300885769, | |
| "grad_norm": 0.6652824282646179, | |
| "learning_rate": 9.515618063897906e-05, | |
| "loss": 1.6103, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.11029031628045909, | |
| "grad_norm": 0.5715838670730591, | |
| "learning_rate": 9.490888763554596e-05, | |
| "loss": 1.5917, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.11201360247234125, | |
| "grad_norm": 0.5438059568405151, | |
| "learning_rate": 9.465577615687761e-05, | |
| "loss": 1.6222, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.11373688866422343, | |
| "grad_norm": 0.5527771711349487, | |
| "learning_rate": 9.43968789965467e-05, | |
| "loss": 1.588, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.1154601748561056, | |
| "grad_norm": 0.6176398396492004, | |
| "learning_rate": 9.413222969772906e-05, | |
| "loss": 1.5747, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.11718346104798778, | |
| "grad_norm": 0.589585542678833, | |
| "learning_rate": 9.386186254885783e-05, | |
| "loss": 1.5938, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.11890674723986995, | |
| "grad_norm": 0.5356398820877075, | |
| "learning_rate": 9.3585812579181e-05, | |
| "loss": 1.5822, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.12063003343175212, | |
| "grad_norm": 0.6897734999656677, | |
| "learning_rate": 9.330411555422285e-05, | |
| "loss": 1.5982, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.1223533196236343, | |
| "grad_norm": 0.5214916467666626, | |
| "learning_rate": 9.30168079711502e-05, | |
| "loss": 1.5898, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.12407660581551647, | |
| "grad_norm": 0.5990198850631714, | |
| "learning_rate": 9.272392705404372e-05, | |
| "loss": 1.6032, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.12579989200739863, | |
| "grad_norm": 0.531247079372406, | |
| "learning_rate": 9.242551074907519e-05, | |
| "loss": 1.6082, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.12752317819928083, | |
| "grad_norm": 0.607933521270752, | |
| "learning_rate": 9.212159771959101e-05, | |
| "loss": 1.5817, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.129246464391163, | |
| "grad_norm": 0.5464344024658203, | |
| "learning_rate": 9.181222734110301e-05, | |
| "loss": 1.5759, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.13096975058304516, | |
| "grad_norm": 0.6487947106361389, | |
| "learning_rate": 9.149743969618683e-05, | |
| "loss": 1.6067, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.13269303677492733, | |
| "grad_norm": 0.556429922580719, | |
| "learning_rate": 9.117727556928875e-05, | |
| "loss": 1.5863, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.1344163229668095, | |
| "grad_norm": 0.5772918462753296, | |
| "learning_rate": 9.085177644144167e-05, | |
| "loss": 1.5888, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.1361396091586917, | |
| "grad_norm": 0.5730582475662231, | |
| "learning_rate": 9.052098448489062e-05, | |
| "loss": 1.5983, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.13786289535057386, | |
| "grad_norm": 0.5221332311630249, | |
| "learning_rate": 9.018494255762894e-05, | |
| "loss": 1.5757, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.13958618154245603, | |
| "grad_norm": 0.5817165970802307, | |
| "learning_rate": 8.98436941978455e-05, | |
| "loss": 1.5813, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.1413094677343382, | |
| "grad_norm": 0.5672810673713684, | |
| "learning_rate": 8.949728361828381e-05, | |
| "loss": 1.5997, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.14303275392622036, | |
| "grad_norm": 0.5768831372261047, | |
| "learning_rate": 8.914575570051375e-05, | |
| "loss": 1.5707, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.14475604011810256, | |
| "grad_norm": 0.6032638549804688, | |
| "learning_rate": 8.878915598911664e-05, | |
| "loss": 1.5892, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.14647932630998473, | |
| "grad_norm": 0.5976369976997375, | |
| "learning_rate": 8.842753068578434e-05, | |
| "loss": 1.5996, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.1482026125018669, | |
| "grad_norm": 0.559442400932312, | |
| "learning_rate": 8.806092664333333e-05, | |
| "loss": 1.5813, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.14992589869374906, | |
| "grad_norm": 0.516207218170166, | |
| "learning_rate": 8.768939135963447e-05, | |
| "loss": 1.5742, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.15164918488563123, | |
| "grad_norm": 0.553333044052124, | |
| "learning_rate": 8.731297297145889e-05, | |
| "loss": 1.585, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.15337247107751342, | |
| "grad_norm": 0.5709084868431091, | |
| "learning_rate": 8.693172024824143e-05, | |
| "loss": 1.5811, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.1550957572693956, | |
| "grad_norm": 0.52576744556427, | |
| "learning_rate": 8.654568258576197e-05, | |
| "loss": 1.5843, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.1550957572693956, | |
| "eval_accuracy": 0.010426478228323498, | |
| "eval_loss": 1.732275128364563, | |
| "eval_runtime": 1695.9476, | |
| "eval_samples_per_second": 501.14, | |
| "eval_steps_per_second": 15.661, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.15681904346127776, | |
| "grad_norm": 0.5634833574295044, | |
| "learning_rate": 8.615490999974563e-05, | |
| "loss": 1.5927, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.15854232965315992, | |
| "grad_norm": 0.5738709568977356, | |
| "learning_rate": 8.575945311938262e-05, | |
| "loss": 1.6131, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.1602656158450421, | |
| "grad_norm": 0.5828307867050171, | |
| "learning_rate": 8.535936318076864e-05, | |
| "loss": 1.5766, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.1619889020369243, | |
| "grad_norm": 0.580729603767395, | |
| "learning_rate": 8.495469202026669e-05, | |
| "loss": 1.5902, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.16371218822880645, | |
| "grad_norm": 0.568894624710083, | |
| "learning_rate": 8.454549206779092e-05, | |
| "loss": 1.5671, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.16543547442068862, | |
| "grad_norm": 0.5564482808113098, | |
| "learning_rate": 8.413181634001391e-05, | |
| "loss": 1.5778, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.1671587606125708, | |
| "grad_norm": 0.5514076948165894, | |
| "learning_rate": 8.371371843349755e-05, | |
| "loss": 1.5874, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.16888204680445298, | |
| "grad_norm": 0.5865207314491272, | |
| "learning_rate": 8.329125251774916e-05, | |
| "loss": 1.5637, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.17060533299633515, | |
| "grad_norm": 0.5577490329742432, | |
| "learning_rate": 8.286447332820298e-05, | |
| "loss": 1.5801, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.17232861918821732, | |
| "grad_norm": 0.5502321124076843, | |
| "learning_rate": 8.243343615912877e-05, | |
| "loss": 1.5695, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1740519053800995, | |
| "grad_norm": 0.5614681243896484, | |
| "learning_rate": 8.199819685646759e-05, | |
| "loss": 1.5892, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.17577519157198165, | |
| "grad_norm": 0.5736984014511108, | |
| "learning_rate": 8.155881181059644e-05, | |
| "loss": 1.5911, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.17749847776386385, | |
| "grad_norm": 0.49306830763816833, | |
| "learning_rate": 8.111533794902217e-05, | |
| "loss": 1.5481, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.17922176395574602, | |
| "grad_norm": 0.5843108892440796, | |
| "learning_rate": 8.066783272900586e-05, | |
| "loss": 1.582, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.18094505014762818, | |
| "grad_norm": 0.5754996538162231, | |
| "learning_rate": 8.02163541301185e-05, | |
| "loss": 1.5885, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.18266833633951035, | |
| "grad_norm": 0.5479796528816223, | |
| "learning_rate": 7.976096064672915e-05, | |
| "loss": 1.5693, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.18439162253139252, | |
| "grad_norm": 0.5987735390663147, | |
| "learning_rate": 7.930171128042627e-05, | |
| "loss": 1.5679, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.1861149087232747, | |
| "grad_norm": 0.5608052611351013, | |
| "learning_rate": 7.88386655323733e-05, | |
| "loss": 1.5559, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.18783819491515688, | |
| "grad_norm": 0.5474194288253784, | |
| "learning_rate": 7.83718833955997e-05, | |
| "loss": 1.5846, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.18956148110703905, | |
| "grad_norm": 0.5139473676681519, | |
| "learning_rate": 7.790142534722805e-05, | |
| "loss": 1.5715, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.19128476729892122, | |
| "grad_norm": 0.5175371170043945, | |
| "learning_rate": 7.742735234063859e-05, | |
| "loss": 1.5864, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.19300805349080338, | |
| "grad_norm": 0.5598956942558289, | |
| "learning_rate": 7.694972579757193e-05, | |
| "loss": 1.5373, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.19473133968268558, | |
| "grad_norm": 0.5191853642463684, | |
| "learning_rate": 7.646860760017124e-05, | |
| "loss": 1.5573, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.19645462587456775, | |
| "grad_norm": 0.5062898993492126, | |
| "learning_rate": 7.598406008296456e-05, | |
| "loss": 1.5797, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1981779120664499, | |
| "grad_norm": 0.5880659222602844, | |
| "learning_rate": 7.549614602478872e-05, | |
| "loss": 1.558, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.19990119825833208, | |
| "grad_norm": 0.5346918702125549, | |
| "learning_rate": 7.500492864065559e-05, | |
| "loss": 1.562, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.20162448445021428, | |
| "grad_norm": 0.5520205497741699, | |
| "learning_rate": 7.451047157356182e-05, | |
| "loss": 1.5458, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.20334777064209644, | |
| "grad_norm": 0.5452098250389099, | |
| "learning_rate": 7.401283888624307e-05, | |
| "loss": 1.5783, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.2050710568339786, | |
| "grad_norm": 0.5486232042312622, | |
| "learning_rate": 7.351209505287412e-05, | |
| "loss": 1.5635, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.20679434302586078, | |
| "grad_norm": 0.5769017934799194, | |
| "learning_rate": 7.300830495071524e-05, | |
| "loss": 1.5473, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.20679434302586078, | |
| "eval_accuracy": 0.010505145107655028, | |
| "eval_loss": 1.7127723693847656, | |
| "eval_runtime": 1714.6588, | |
| "eval_samples_per_second": 495.671, | |
| "eval_steps_per_second": 15.49, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.20851762921774294, | |
| "grad_norm": 0.5299004912376404, | |
| "learning_rate": 7.250153385170675e-05, | |
| "loss": 1.5631, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.21024091540962514, | |
| "grad_norm": 0.6350430250167847, | |
| "learning_rate": 7.199184741401222e-05, | |
| "loss": 1.5484, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.2119642016015073, | |
| "grad_norm": 0.5689346790313721, | |
| "learning_rate": 7.147931167351162e-05, | |
| "loss": 1.5616, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.21368748779338947, | |
| "grad_norm": 0.5793879628181458, | |
| "learning_rate": 7.096399303524577e-05, | |
| "loss": 1.5496, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.21541077398527164, | |
| "grad_norm": 0.5497804284095764, | |
| "learning_rate": 7.044595826481253e-05, | |
| "loss": 1.5667, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.2171340601771538, | |
| "grad_norm": 0.5804843902587891, | |
| "learning_rate": 6.992527447971677e-05, | |
| "loss": 1.5586, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.218857346369036, | |
| "grad_norm": 0.5805226564407349, | |
| "learning_rate": 6.940200914067431e-05, | |
| "loss": 1.5428, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.22058063256091817, | |
| "grad_norm": 0.5112205743789673, | |
| "learning_rate": 6.887623004287182e-05, | |
| "loss": 1.5597, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.22230391875280034, | |
| "grad_norm": 0.5555017590522766, | |
| "learning_rate": 6.834800530718285e-05, | |
| "loss": 1.5349, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.2240272049446825, | |
| "grad_norm": 0.5393018126487732, | |
| "learning_rate": 6.781740337134229e-05, | |
| "loss": 1.5392, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.22575049113656467, | |
| "grad_norm": 0.515864372253418, | |
| "learning_rate": 6.728449298107919e-05, | |
| "loss": 1.5617, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.22747377732844687, | |
| "grad_norm": 0.5203471779823303, | |
| "learning_rate": 6.674934318121013e-05, | |
| "loss": 1.5492, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.22919706352032904, | |
| "grad_norm": 0.5489692091941833, | |
| "learning_rate": 6.621202330669354e-05, | |
| "loss": 1.544, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.2309203497122112, | |
| "grad_norm": 0.5596274137496948, | |
| "learning_rate": 6.567260297364659e-05, | |
| "loss": 1.5463, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.23264363590409337, | |
| "grad_norm": 0.5610251426696777, | |
| "learning_rate": 6.513115207032557e-05, | |
| "loss": 1.5802, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.23436692209597557, | |
| "grad_norm": 0.5264619588851929, | |
| "learning_rate": 6.458774074807107e-05, | |
| "loss": 1.5545, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.23609020828785773, | |
| "grad_norm": 0.5814141631126404, | |
| "learning_rate": 6.404243941221903e-05, | |
| "loss": 1.5521, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.2378134944797399, | |
| "grad_norm": 0.5240880846977234, | |
| "learning_rate": 6.349531871297896e-05, | |
| "loss": 1.5675, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.23953678067162207, | |
| "grad_norm": 0.4984951913356781, | |
| "learning_rate": 6.294644953628023e-05, | |
| "loss": 1.5559, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.24126006686350424, | |
| "grad_norm": 0.5289067029953003, | |
| "learning_rate": 6.239590299458814e-05, | |
| "loss": 1.5285, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.24298335305538643, | |
| "grad_norm": 0.5221706032752991, | |
| "learning_rate": 6.184375041769032e-05, | |
| "loss": 1.553, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.2447066392472686, | |
| "grad_norm": 0.5475857257843018, | |
| "learning_rate": 6.12900633434552e-05, | |
| "loss": 1.5675, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.24642992543915077, | |
| "grad_norm": 0.5271047353744507, | |
| "learning_rate": 6.0734913508563395e-05, | |
| "loss": 1.5487, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.24815321163103293, | |
| "grad_norm": 0.5180040001869202, | |
| "learning_rate": 6.0178372839213406e-05, | |
| "loss": 1.5281, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.2498764978229151, | |
| "grad_norm": 0.566608726978302, | |
| "learning_rate": 5.9620513441802714e-05, | |
| "loss": 1.5602, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.25159978401479727, | |
| "grad_norm": 0.5131779909133911, | |
| "learning_rate": 5.906140759358555e-05, | |
| "loss": 1.5111, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.25332307020667943, | |
| "grad_norm": 0.5626484751701355, | |
| "learning_rate": 5.85011277333085e-05, | |
| "loss": 1.5528, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.25504635639856166, | |
| "grad_norm": 0.550121545791626, | |
| "learning_rate": 5.793974645182526e-05, | |
| "loss": 1.5401, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.2567696425904438, | |
| "grad_norm": 0.5408352017402649, | |
| "learning_rate": 5.737733648269162e-05, | |
| "loss": 1.5437, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.258492928782326, | |
| "grad_norm": 0.5391642451286316, | |
| "learning_rate": 5.6813970692741945e-05, | |
| "loss": 1.5407, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.258492928782326, | |
| "eval_accuracy": 0.010407141521982707, | |
| "eval_loss": 1.6942352056503296, | |
| "eval_runtime": 1748.5708, | |
| "eval_samples_per_second": 486.058, | |
| "eval_steps_per_second": 15.19, | |
| "step": 15000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 30000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 3000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.041448973814661e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |