ivnle commited on
Commit
3ea4545
·
verified ·
1 Parent(s): 41df100

Upload folder using huggingface_hub

Browse files
text_ctx277_h0_lm/args.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "regime": "text",
3
+ "data_path": "data/training/splits_510k/train_arrow",
4
+ "output_dir": "outputs/production_text_ctx277_lm_20251125_003839",
5
+ "objective": "lm",
6
+ "val_data_path": "data/training/splits_510k/val_arrow",
7
+ "max_samples": null,
8
+ "vision_mode": "small",
9
+ "text_context_tokens": 277,
10
+ "hybrid_text_tokens": 0,
11
+ "vision_prompt": null,
12
+ "train_encoder": false,
13
+ "encoder_lr": 1e-05,
14
+ "compression_window_size": 9,
15
+ "compression_stride": 9,
16
+ "subsample_strategy": "regular",
17
+ "subsample_count": null,
18
+ "projection_dim": null,
19
+ "train_projection": false,
20
+ "compression_target": null,
21
+ "conv_kernel": 5,
22
+ "timestamp": "20251125_003839",
23
+ "batch_size": 12,
24
+ "gradient_accumulation_steps": 4,
25
+ "learning_rate": 0.0001,
26
+ "weight_decay": 0.01,
27
+ "num_epochs": 1,
28
+ "warmup_ratio": 0.1,
29
+ "max_grad_norm": 1.0,
30
+ "log_steps": 10,
31
+ "save_steps": 0,
32
+ "eval_steps": 2000,
33
+ "initial_validation": false,
34
+ "validation_only": false,
35
+ "no_checkpoints": false,
36
+ "num_qualitative_samples": 0,
37
+ "max_generation_tokens": 200,
38
+ "use_wandb": true,
39
+ "wandb_project": "vision-compression-2",
40
+ "wandb_run_name": "production_text_ctx277_lm_20251125_003839",
41
+ "resume_from_checkpoint": "outputs/production_text_ctx277_lm_20251125_003839/best_checkpoint.pt",
42
+ "resume": "outputs/production_text_ctx277_lm_20251125_003839/best_checkpoint.pt",
43
+ "init_from_checkpoint": null,
44
+ "allow_objective_switch": false,
45
+ "aux_loss_weight": 0.5,
46
+ "num_workers": 16,
47
+ "prefetch_factor": 4,
48
+ "seed": 42,
49
+ "eval_seed": 42,
50
+ "debug_log_sample_ids": false,
51
+ "device": "cuda",
52
+ "compile": false,
53
+ "compile_mode": "default",
54
+ "use_optimized_model": true,
55
+ "use_encoder_checkpointing": true,
56
+ "use_decoder_checkpointing": true,
57
+ "use_8bit_optimizer": true
58
+ }
text_ctx277_h0_lm/best_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cb79961e47aeb2eea2482d430b8f9d66a4894346730f0966a8b1f6bb2f0201
3
+ size 11837130510
text_ctx277_h0_lm/checkpoint_log.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 2025-11-25 05:34:49 | run_id: y619ou6b | checkpoint: best_checkpoint.pt | global_step: 2000 | val_loss: 1.7708
2
+ 2025-11-25 10:23:59 | run_id: y619ou6b | checkpoint: best_checkpoint.pt | global_step: 4000 | val_loss: 1.7096
3
+ 2025-11-25 15:15:18 | run_id: y619ou6b | checkpoint: best_checkpoint.pt | global_step: 6000 | val_loss: 1.6476
4
+ 2025-11-25 20:05:20 | run_id: y619ou6b | checkpoint: best_checkpoint.pt | global_step: 8000 | val_loss: 1.6169
5
+ 2025-11-27 01:39:36 | run_id: xjb0fgkh | checkpoint: best_checkpoint.pt | global_step: 10000 | val_loss: 1.6137
6
+ 2025-11-27 02:43:13 | run_id: xjb0fgkh | checkpoint: best_checkpoint.pt | global_step: 10417 | val_loss: 1.6137
text_ctx277_h0_lm/final_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cb79961e47aeb2eea2482d430b8f9d66a4894346730f0966a8b1f6bb2f0201
3
+ size 11837130510
text_ctx277_h0_lm/train.log ADDED
The diff for this file is too large to render. See raw diff