gbyuvd commited on
Commit
fefea19
·
verified ·
1 Parent(s): 4be8017

Update after 1st chunk of MTP+NTP

Browse files

time step loss eval_loss
2025-09-27 09:19:21 1635
2025-09-27 09:30:19 2043 3.2852
2025-09-27 09:31:41 2043 1.8569737672805786
2025-09-27 10:22:48 4086 1.9708
2025-09-27 10:24:01 4086 1.7518565654754639
2025-09-27 11:13:59 6129 1.7177
2025-09-27 11:15:10 6129 1.6441054344177246
2025-09-27 12:05:03 8172 1.5895
2025-09-27 12:06:12 8172 1.5314661264419556
2025-09-27 12:06:17 8175

Files changed (3) hide show
  1. model.safetensors +1 -1
  2. training_args.bin +1 -1
  3. training_config.json +2 -2
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea856ee37e10e9888b30ca275ae0d2779e3019e3ffe1d00582e00ba122827b83
3
  size 39437252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e573ac4d012c965dc07b9628d259170f9b9a9db5af857695f3c49e43da83b39a
3
  size 39437252
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9346ee479ba8803e8106765d8dc2fd4d69c529c27ef5c57c8edfa586f1c372f3
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426475db7e68e7905ebadd505f46b81c0da444386a36fba5e9404dc848b3bf00
3
  size 5368
training_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "model_type": "EnhancedQwen3ForCausalLM",
3
  "num_future_tokens": 3,
4
  "horizon_loss_enabled": true,
5
  "mtp_head_enabled": true,
@@ -7,5 +7,5 @@
7
  "causal_lm_warmup",
8
  "mtp_horizon_training"
9
  ],
10
- "total_parameters": 9854851
11
  }
 
1
  {
2
+ "model_type": "ChemQ3MTPForCausalLM",
3
  "num_future_tokens": 3,
4
  "horizon_loss_enabled": true,
5
  "mtp_head_enabled": true,
 
7
  "causal_lm_warmup",
8
  "mtp_horizon_training"
9
  ],
10
+ "total_parameters": 9857155
11
  }