TitleOS
/

GalacticReasoning-1.3b

@@ -1,478 +0,0 @@
-{
-  "best_global_step": 600,
-  "best_metric": 1.4698271751403809,
-  "best_model_checkpoint": "output\\galactic_reasoning_1b\\checkpoint-600",
-  "epoch": 0.06,
-  "eval_steps": 200,
-  "global_step": 600,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.001,
-      "grad_norm": 0.06737130880355835,
-      "learning_rate": 9e-06,
-      "loss": 1.9371,
-      "step": 10
-    },
-    {
-      "epoch": 0.002,
-      "grad_norm": 0.07842425256967545,
-      "learning_rate": 1.9e-05,
-      "loss": 1.9399,
-      "step": 20
-    },
-    {
-      "epoch": 0.003,
-      "grad_norm": 0.10731378942728043,
-      "learning_rate": 2.9e-05,
-      "loss": 1.9374,
-      "step": 30
-    },
-    {
-      "epoch": 0.004,
-      "grad_norm": 0.12309419363737106,
-      "learning_rate": 3.9000000000000006e-05,
-      "loss": 1.9112,
-      "step": 40
-    },
-    {
-      "epoch": 0.005,
-      "grad_norm": 0.11017554998397827,
-      "learning_rate": 4.9e-05,
-      "loss": 1.871,
-      "step": 50
-    },
-    {
-      "epoch": 0.006,
-      "grad_norm": 0.16252341866493225,
-      "learning_rate": 5.9e-05,
-      "loss": 1.8408,
-      "step": 60
-    },
-    {
-      "epoch": 0.007,
-      "grad_norm": 0.1429395228624344,
-      "learning_rate": 6.9e-05,
-      "loss": 1.8236,
-      "step": 70
-    },
-    {
-      "epoch": 0.008,
-      "grad_norm": 0.15082544088363647,
-      "learning_rate": 7.900000000000001e-05,
-      "loss": 1.7789,
-      "step": 80
-    },
-    {
-      "epoch": 0.009,
-      "grad_norm": 0.16160228848457336,
-      "learning_rate": 8.900000000000001e-05,
-      "loss": 1.744,
-      "step": 90
-    },
-    {
-      "epoch": 0.01,
-      "grad_norm": 0.17471027374267578,
-      "learning_rate": 9.900000000000001e-05,
-      "loss": 1.7349,
-      "step": 100
-    },
-    {
-      "epoch": 0.011,
-      "grad_norm": 0.16684666275978088,
-      "learning_rate": 9.990909090909092e-05,
-      "loss": 1.6772,
-      "step": 110
-    },
-    {
-      "epoch": 0.012,
-      "grad_norm": 0.1762007474899292,
-      "learning_rate": 9.980808080808081e-05,
-      "loss": 1.6622,
-      "step": 120
-    },
-    {
-      "epoch": 0.013,
-      "grad_norm": 0.17090925574302673,
-      "learning_rate": 9.970707070707072e-05,
-      "loss": 1.6644,
-      "step": 130
-    },
-    {
-      "epoch": 0.014,
-      "grad_norm": 0.18479526042938232,
-      "learning_rate": 9.960606060606061e-05,
-      "loss": 1.642,
-      "step": 140
-    },
-    {
-      "epoch": 0.015,
-      "grad_norm": 0.18685005605220795,
-      "learning_rate": 9.950505050505052e-05,
-      "loss": 1.6376,
-      "step": 150
-    },
-    {
-      "epoch": 0.016,
-      "grad_norm": 0.18370380997657776,
-      "learning_rate": 9.940404040404041e-05,
-      "loss": 1.6296,
-      "step": 160
-    },
-    {
-      "epoch": 0.017,
-      "grad_norm": 0.20909732580184937,
-      "learning_rate": 9.93030303030303e-05,
-      "loss": 1.6176,
-      "step": 170
-    },
-    {
-      "epoch": 0.018,
-      "grad_norm": 0.20230641961097717,
-      "learning_rate": 9.920202020202021e-05,
-      "loss": 1.6112,
-      "step": 180
-    },
-    {
-      "epoch": 0.019,
-      "grad_norm": 0.21034330129623413,
-      "learning_rate": 9.91010101010101e-05,
-      "loss": 1.6239,
-      "step": 190
-    },
-    {
-      "epoch": 0.02,
-      "grad_norm": 0.19535459578037262,
-      "learning_rate": 9.900000000000001e-05,
-      "loss": 1.6168,
-      "step": 200
-    },
-    {
-      "epoch": 0.02,
-      "eval_loss": 1.5481634140014648,
-      "eval_runtime": 2829.9217,
-      "eval_samples_per_second": 0.353,
-      "eval_steps_per_second": 0.044,
-      "step": 200
-    },
-    {
-      "epoch": 0.021,
-      "grad_norm": 0.20927906036376953,
-      "learning_rate": 9.88989898989899e-05,
-      "loss": 1.6216,
-      "step": 210
-    },
-    {
-      "epoch": 0.022,
-      "grad_norm": 0.21300119161605835,
-      "learning_rate": 9.87979797979798e-05,
-      "loss": 1.5735,
-      "step": 220
-    },
-    {
-      "epoch": 0.023,
-      "grad_norm": 0.23267202079296112,
-      "learning_rate": 9.86969696969697e-05,
-      "loss": 1.5988,
-      "step": 230
-    },
-    {
-      "epoch": 0.024,
-      "grad_norm": 0.204238161444664,
-      "learning_rate": 9.859595959595959e-05,
-      "loss": 1.5656,
-      "step": 240
-    },
-    {
-      "epoch": 0.025,
-      "grad_norm": 0.20555658638477325,
-      "learning_rate": 9.84949494949495e-05,
-      "loss": 1.5992,
-      "step": 250
-    },
-    {
-      "epoch": 0.026,
-      "grad_norm": 0.19925552606582642,
-      "learning_rate": 9.839393939393939e-05,
-      "loss": 1.6159,
-      "step": 260
-    },
-    {
-      "epoch": 0.027,
-      "grad_norm": 0.23314008116722107,
-      "learning_rate": 9.82929292929293e-05,
-      "loss": 1.604,
-      "step": 270
-    },
-    {
-      "epoch": 0.028,
-      "grad_norm": 0.22813379764556885,
-      "learning_rate": 9.81919191919192e-05,
-      "loss": 1.5875,
-      "step": 280
-    },
-    {
-      "epoch": 0.029,
-      "grad_norm": 0.22509580850601196,
-      "learning_rate": 9.80909090909091e-05,
-      "loss": 1.5872,
-      "step": 290
-    },
-    {
-      "epoch": 0.03,
-      "grad_norm": 0.234571173787117,
-      "learning_rate": 9.7989898989899e-05,
-      "loss": 1.5947,
-      "step": 300
-    },
-    {
-      "epoch": 0.031,
-      "grad_norm": 0.22278717160224915,
-      "learning_rate": 9.78888888888889e-05,
-      "loss": 1.5646,
-      "step": 310
-    },
-    {
-      "epoch": 0.032,
-      "grad_norm": 0.21856215596199036,
-      "learning_rate": 9.77878787878788e-05,
-      "loss": 1.5572,
-      "step": 320
-    },
-    {
-      "epoch": 0.033,
-      "grad_norm": 0.2208547592163086,
-      "learning_rate": 9.76868686868687e-05,
-      "loss": 1.5826,
-      "step": 330
-    },
-    {
-      "epoch": 0.034,
-      "grad_norm": 0.2255655825138092,
-      "learning_rate": 9.758585858585859e-05,
-      "loss": 1.5816,
-      "step": 340
-    },
-    {
-      "epoch": 0.035,
-      "grad_norm": 0.22580045461654663,
-      "learning_rate": 9.748484848484849e-05,
-      "loss": 1.5714,
-      "step": 350
-    },
-    {
-      "epoch": 0.036,
-      "grad_norm": 0.2218249887228012,
-      "learning_rate": 9.738383838383839e-05,
-      "loss": 1.5463,
-      "step": 360
-    },
-    {
-      "epoch": 0.037,
-      "grad_norm": 0.22902308404445648,
-      "learning_rate": 9.728282828282829e-05,
-      "loss": 1.5746,
-      "step": 370
-    },
-    {
-      "epoch": 0.038,
-      "grad_norm": 0.21486122906208038,
-      "learning_rate": 9.718181818181818e-05,
-      "loss": 1.546,
-      "step": 380
-    },
-    {
-      "epoch": 0.039,
-      "grad_norm": 0.21602007746696472,
-      "learning_rate": 9.708080808080808e-05,
-      "loss": 1.567,
-      "step": 390
-    },
-    {
-      "epoch": 0.04,
-      "grad_norm": 0.2287064790725708,
-      "learning_rate": 9.697979797979798e-05,
-      "loss": 1.5585,
-      "step": 400
-    },
-    {
-      "epoch": 0.04,
-      "eval_loss": 1.498089075088501,
-      "eval_runtime": 2796.1826,
-      "eval_samples_per_second": 0.358,
-      "eval_steps_per_second": 0.045,
-      "step": 400
-    },
-    {
-      "epoch": 0.041,
-      "grad_norm": 0.24416211247444153,
-      "learning_rate": 9.687878787878788e-05,
-      "loss": 1.5738,
-      "step": 410
-    },
-    {
-      "epoch": 0.042,
-      "grad_norm": 0.2305736094713211,
-      "learning_rate": 9.677777777777778e-05,
-      "loss": 1.5628,
-      "step": 420
-    },
-    {
-      "epoch": 0.043,
-      "grad_norm": 0.22666579484939575,
-      "learning_rate": 9.667676767676768e-05,
-      "loss": 1.5457,
-      "step": 430
-    },
-    {
-      "epoch": 0.044,
-      "grad_norm": 0.24341981112957,
-      "learning_rate": 9.657575757575758e-05,
-      "loss": 1.5488,
-      "step": 440
-    },
-    {
-      "epoch": 0.045,
-      "grad_norm": 0.250980406999588,
-      "learning_rate": 9.647474747474749e-05,
-      "loss": 1.5539,
-      "step": 450
-    },
-    {
-      "epoch": 0.046,
-      "grad_norm": 0.24837073683738708,
-      "learning_rate": 9.637373737373738e-05,
-      "loss": 1.5556,
-      "step": 460
-    },
-    {
-      "epoch": 0.047,
-      "grad_norm": 0.24370302259922028,
-      "learning_rate": 9.627272727272729e-05,
-      "loss": 1.5351,
-      "step": 470
-    },
-    {
-      "epoch": 0.048,
-      "grad_norm": 0.2494235634803772,
-      "learning_rate": 9.617171717171718e-05,
-      "loss": 1.5729,
-      "step": 480
-    },
-    {
-      "epoch": 0.049,
-      "grad_norm": 0.2377675473690033,
-      "learning_rate": 9.607070707070707e-05,
-      "loss": 1.5694,
-      "step": 490
-    },
-    {
-      "epoch": 0.05,
-      "grad_norm": 0.23179057240486145,
-      "learning_rate": 9.596969696969698e-05,
-      "loss": 1.5497,
-      "step": 500
-    },
-    {
-      "epoch": 0.051,
-      "grad_norm": 0.24259260296821594,
-      "learning_rate": 9.586868686868687e-05,
-      "loss": 1.538,
-      "step": 510
-    },
-    {
-      "epoch": 0.052,
-      "grad_norm": 0.22587579488754272,
-      "learning_rate": 9.576767676767678e-05,
-      "loss": 1.5477,
-      "step": 520
-    },
-    {
-      "epoch": 0.053,
-      "grad_norm": 0.2444010227918625,
-      "learning_rate": 9.566666666666667e-05,
-      "loss": 1.5616,
-      "step": 530
-    },
-    {
-      "epoch": 0.054,
-      "grad_norm": 0.2342791110277176,
-      "learning_rate": 9.556565656565656e-05,
-      "loss": 1.5338,
-      "step": 540
-    },
-    {
-      "epoch": 0.055,
-      "grad_norm": 0.22708754241466522,
-      "learning_rate": 9.546464646464647e-05,
-      "loss": 1.5249,
-      "step": 550
-    },
-    {
-      "epoch": 0.056,
-      "grad_norm": 0.22712530195713043,
-      "learning_rate": 9.536363636363636e-05,
-      "loss": 1.5264,
-      "step": 560
-    },
-    {
-      "epoch": 0.057,
-      "grad_norm": 0.24947647750377655,
-      "learning_rate": 9.526262626262627e-05,
-      "loss": 1.5329,
-      "step": 570
-    },
-    {
-      "epoch": 0.058,
-      "grad_norm": 0.24310064315795898,
-      "learning_rate": 9.516161616161616e-05,
-      "loss": 1.5239,
-      "step": 580
-    },
-    {
-      "epoch": 0.059,
-      "grad_norm": 0.23869813978672028,
-      "learning_rate": 9.506060606060607e-05,
-      "loss": 1.5123,
-      "step": 590
-    },
-    {
-      "epoch": 0.06,
-      "grad_norm": 0.24522405862808228,
-      "learning_rate": 9.495959595959596e-05,
-      "loss": 1.5214,
-      "step": 600
-    },
-    {
-      "epoch": 0.06,
-      "eval_loss": 1.4698271751403809,
-      "eval_runtime": 2801.7352,
-      "eval_samples_per_second": 0.357,
-      "eval_steps_per_second": 0.045,
-      "step": 600
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 10000,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 9223372036854775807,
-  "save_steps": 200,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.3364964130445722e+17,
-  "train_batch_size": 4,
-  "trial_name": null,
-  "trial_params": null
-}