Update after 1st chunk of MTP+NTP
Browse filestime step loss eval_loss
2025-09-27 09:19:21 1635
2025-09-27 09:30:19 2043 3.2852
2025-09-27 09:31:41 2043 1.8569737672805786
2025-09-27 10:22:48 4086 1.9708
2025-09-27 10:24:01 4086 1.7518565654754639
2025-09-27 11:13:59 6129 1.7177
2025-09-27 11:15:10 6129 1.6441054344177246
2025-09-27 12:05:03 8172 1.5895
2025-09-27 12:06:12 8172 1.5314661264419556
2025-09-27 12:06:17 8175
- model.safetensors +1 -1
- training_args.bin +1 -1
- training_config.json +2 -2
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 39437252
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e573ac4d012c965dc07b9628d259170f9b9a9db5af857695f3c49e43da83b39a
|
| 3 |
size 39437252
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5368
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:426475db7e68e7905ebadd505f46b81c0da444386a36fba5e9404dc848b3bf00
|
| 3 |
size 5368
|
training_config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"model_type": "
|
| 3 |
"num_future_tokens": 3,
|
| 4 |
"horizon_loss_enabled": true,
|
| 5 |
"mtp_head_enabled": true,
|
|
@@ -7,5 +7,5 @@
|
|
| 7 |
"causal_lm_warmup",
|
| 8 |
"mtp_horizon_training"
|
| 9 |
],
|
| 10 |
-
"total_parameters":
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_type": "ChemQ3MTPForCausalLM",
|
| 3 |
"num_future_tokens": 3,
|
| 4 |
"horizon_loss_enabled": true,
|
| 5 |
"mtp_head_enabled": true,
|
|
|
|
| 7 |
"causal_lm_warmup",
|
| 8 |
"mtp_horizon_training"
|
| 9 |
],
|
| 10 |
+
"total_parameters": 9857155
|
| 11 |
}
|