Update after 1st chunk of MTP+NTP

time step loss eval_loss
2025-09-27 09:19:21 1635
2025-09-27 09:30:19 2043 3.2852
2025-09-27 09:31:41 2043 1.8569737672805786
2025-09-27 10:22:48 4086 1.9708
2025-09-27 10:24:01 4086 1.7518565654754639
2025-09-27 11:13:59 6129 1.7177
2025-09-27 11:15:10 6129 1.6441054344177246
2025-09-27 12:05:03 8172 1.5895
2025-09-27 12:06:12 8172 1.5314661264419556
2025-09-27 12:06:17 8175

Files changed (3) hide show

model.safetensors +1 -1
training_args.bin +1 -1
training_config.json +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea856ee37e10e9888b30ca275ae0d2779e3019e3ffe1d00582e00ba122827b83
 size 39437252

 version https://git-lfs.github.com/spec/v1
+oid sha256:e573ac4d012c965dc07b9628d259170f9b9a9db5af857695f3c49e43da83b39a
 size 39437252

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9346ee479ba8803e8106765d8dc2fd4d69c529c27ef5c57c8edfa586f1c372f3
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:426475db7e68e7905ebadd505f46b81c0da444386a36fba5e9404dc848b3bf00
 size 5368

training_config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "model_type": "EnhancedQwen3ForCausalLM",
   "num_future_tokens": 3,
   "horizon_loss_enabled": true,
   "mtp_head_enabled": true,
@@ -7,5 +7,5 @@
     "causal_lm_warmup",
     "mtp_horizon_training"
   ],
-  "total_parameters": 9854851
 }

 {
+  "model_type": "ChemQ3MTPForCausalLM",
   "num_future_tokens": 3,
   "horizon_loss_enabled": true,
   "mtp_head_enabled": true,
     "causal_lm_warmup",
     "mtp_horizon_training"
   ],
+  "total_parameters": 9857155
 }