taguser commited on
Commit
a5ef0f9
·
verified ·
1 Parent(s): f62c61c

Add files using upload-large-folder tool

Browse files
adapter_config.json CHANGED
@@ -24,13 +24,13 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "up_proj",
28
- "q_proj",
29
- "gate_proj",
30
  "k_proj",
 
 
31
  "v_proj",
32
- "down_proj",
33
- "o_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "down_proj",
 
 
28
  "k_proj",
29
+ "o_proj",
30
+ "up_proj",
31
  "v_proj",
32
+ "q_proj",
33
+ "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e73c946a8c6976ed95b4be5f1694e188145b29d958a90d8551f9bb644f21b7bf
3
  size 275341720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47feda9f0da80a8169fb143b1852937819325618c06ae934508ede76d8a0bdc5
3
  size 275341720
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.9904761904761905,
3
- "total_flos": 1.1855717021096346e+17,
4
- "train_loss": 0.5348628270320404,
5
- "train_runtime": 803.1773,
6
- "train_samples_per_second": 1.569,
7
- "train_steps_per_second": 0.049
8
  }
 
1
  {
2
+ "epoch": 0.6666666666666666,
3
+ "total_flos": 5127839837847552.0,
4
+ "train_loss": 0.6043767929077148,
5
+ "train_runtime": 41.7151,
6
+ "train_samples_per_second": 1.151,
7
+ "train_steps_per_second": 0.024
8
  }
card.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "name": "openshift-builds-operator-epoch1-2025-Jun-08",
3
  "base_model": "Qwen/Qwen2.5-Coder-14B-Instruct" ,
4
- "context_length": "3180",
5
  "model_type": "qwen",
6
  "quantized": "True",
7
  "finetune_steps": [
@@ -9,7 +9,7 @@
9
  "model_repo": "taguser/openshift-builds-operator-epoch1-2025-Jun-08",
10
  "base_model": "Qwen/Qwen2.5-Coder-14B-Instruct",
11
  "step": 2,
12
- "data": "cia-tools/parsed_data",
13
  "epochs": "1",
14
  "batch_size": "32",
15
  "dataset_size": "48",
 
1
  {
2
  "name": "openshift-builds-operator-epoch1-2025-Jun-08",
3
  "base_model": "Qwen/Qwen2.5-Coder-14B-Instruct" ,
4
+ "context_length": "4081",
5
  "model_type": "qwen",
6
  "quantized": "True",
7
  "finetune_steps": [
 
9
  "model_repo": "taguser/openshift-builds-operator-epoch1-2025-Jun-08",
10
  "base_model": "Qwen/Qwen2.5-Coder-14B-Instruct",
11
  "step": 2,
12
+ "data": "cia-tools/smallds",
13
  "epochs": "1",
14
  "batch_size": "32",
15
  "dataset_size": "48",
checkpoint-1/adapter_config.json CHANGED
@@ -24,13 +24,13 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "up_proj",
28
- "q_proj",
29
- "gate_proj",
30
  "k_proj",
 
 
31
  "v_proj",
32
- "down_proj",
33
- "o_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "down_proj",
 
 
28
  "k_proj",
29
+ "o_proj",
30
+ "up_proj",
31
  "v_proj",
32
+ "q_proj",
33
+ "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
checkpoint-1/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6f54188f7668fc1d69b3fdec8ebb95d0891242b7736d6f52527318f441ec3e
3
  size 551070979
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cc2ca16d3bff49bcd53e33f7f58bfb39e819d8de87d1c5a9b3077b05b28c70e
3
  size 551070979
checkpoint-1/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58a513cd0857709df493227d4bf4fe8df294021f339dc3c315675b6824b1543
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f268ca93c88b4d82f442bd1280edcc0606fe9d52a665ccd6a004f3d3832534
3
  size 14917
checkpoint-1/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0070bb4cb00ec3dc3f9e4ced2f85698f7b32cdfbbf4bb2dc23198456a9e73896
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c690eabcc123561ee0f29f969ab9c61ebc9aaf6fbde86b41586c49dbfa7001b5
3
  size 14917
checkpoint-1/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6afcff6e27e60a2bfb38f5eb37f1e72b0551015bab2db232483e28c88d01030c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47405cf868a21e2c387f73023f8c9b17a7966fb2f86d736ea5850c65a0716f4
3
  size 1465
checkpoint-1/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.025396825396825397,
6
  "eval_steps": 500,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
@@ -10,7 +10,7 @@
10
  "is_world_process_zero": true,
11
  "log_history": [],
12
  "logging_steps": 5,
13
- "max_steps": 39,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 1,
16
  "save_steps": 1,
@@ -21,12 +21,12 @@
21
  "should_evaluate": false,
22
  "should_log": false,
23
  "should_save": true,
24
- "should_training_stop": false
25
  },
26
  "attributes": {}
27
  }
28
  },
29
- "total_flos": 2916214342942720.0,
30
  "train_batch_size": 1,
31
  "trial_name": null,
32
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6666666666666666,
6
  "eval_steps": 500,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [],
12
  "logging_steps": 5,
13
+ "max_steps": 1,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 1,
16
  "save_steps": 1,
 
21
  "should_evaluate": false,
22
  "should_log": false,
23
  "should_save": true,
24
+ "should_training_stop": true
25
  },
26
  "attributes": {}
27
  }
28
  },
29
+ "total_flos": 5127839837847552.0,
30
  "train_batch_size": 1,
31
  "trial_name": null,
32
  "trial_params": null
checkpoint-1/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df62c6b3c891ce82ecd4497b02ca5a9f9048b25b92bbe3c2d8dfe6ccf7d014e4
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0315bf318104bf2951c648b1327dd0c081b6b91b6d39aede83507e1003d3f279
3
  size 6097
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.9904761904761905,
3
- "total_flos": 1.1855717021096346e+17,
4
- "train_loss": 0.5348628270320404,
5
- "train_runtime": 803.1773,
6
- "train_samples_per_second": 1.569,
7
- "train_steps_per_second": 0.049
8
  }
 
1
  {
2
+ "epoch": 0.6666666666666666,
3
+ "total_flos": 5127839837847552.0,
4
+ "train_loss": 0.6043767929077148,
5
+ "train_runtime": 41.7151,
6
+ "train_samples_per_second": 1.151,
7
+ "train_steps_per_second": 0.024
8
  }
trainer_log.jsonl CHANGED
@@ -1,8 +1 @@
1
- {"current_steps": 5, "total_steps": 39, "loss": 0.5952, "lr": 5e-05, "epoch": 0.12698412698412698, "percentage": 12.82, "elapsed_time": "0:02:55", "remaining_time": "0:19:50"}
2
- {"current_steps": 10, "total_steps": 39, "loss": 0.583, "lr": 4.752422169756048e-05, "epoch": 0.25396825396825395, "percentage": 25.64, "elapsed_time": "0:04:37", "remaining_time": "0:13:25"}
3
- {"current_steps": 15, "total_steps": 39, "loss": 0.5784, "lr": 4.058724504646834e-05, "epoch": 0.38095238095238093, "percentage": 38.46, "elapsed_time": "0:06:08", "remaining_time": "0:09:49"}
4
- {"current_steps": 20, "total_steps": 39, "loss": 0.5345, "lr": 3.056302334890786e-05, "epoch": 0.5079365079365079, "percentage": 51.28, "elapsed_time": "0:07:36", "remaining_time": "0:07:13"}
5
- {"current_steps": 25, "total_steps": 39, "loss": 0.4879, "lr": 1.9436976651092144e-05, "epoch": 0.6349206349206349, "percentage": 64.1, "elapsed_time": "0:09:02", "remaining_time": "0:05:03"}
6
- {"current_steps": 30, "total_steps": 39, "loss": 0.5065, "lr": 9.412754953531663e-06, "epoch": 0.7619047619047619, "percentage": 76.92, "elapsed_time": "0:10:30", "remaining_time": "0:03:09"}
7
- {"current_steps": 35, "total_steps": 39, "loss": 0.5044, "lr": 2.475778302439524e-06, "epoch": 0.8888888888888888, "percentage": 89.74, "elapsed_time": "0:12:05", "remaining_time": "0:01:22"}
8
- {"current_steps": 39, "total_steps": 39, "epoch": 0.9904761904761905, "percentage": 100.0, "elapsed_time": "0:13:20", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 1, "total_steps": 1, "epoch": 0.6666666666666666, "percentage": 100.0, "elapsed_time": "0:00:39", "remaining_time": "0:00:00"}
 
 
 
 
 
 
 
trainer_state.json CHANGED
@@ -2,74 +2,25 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9904761904761905,
6
  "eval_steps": 500,
7
- "global_step": 39,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.12698412698412698,
14
- "grad_norm": 0.030378561466932297,
15
- "learning_rate": 5e-05,
16
- "loss": 0.5952,
17
- "step": 5
18
- },
19
- {
20
- "epoch": 0.25396825396825395,
21
- "grad_norm": 0.05285210907459259,
22
- "learning_rate": 4.752422169756048e-05,
23
- "loss": 0.583,
24
- "step": 10
25
- },
26
- {
27
- "epoch": 0.38095238095238093,
28
- "grad_norm": 0.04900702089071274,
29
- "learning_rate": 4.058724504646834e-05,
30
- "loss": 0.5784,
31
- "step": 15
32
- },
33
- {
34
- "epoch": 0.5079365079365079,
35
- "grad_norm": 0.041186440736055374,
36
- "learning_rate": 3.056302334890786e-05,
37
- "loss": 0.5345,
38
- "step": 20
39
- },
40
- {
41
- "epoch": 0.6349206349206349,
42
- "grad_norm": 0.04592454433441162,
43
- "learning_rate": 1.9436976651092144e-05,
44
- "loss": 0.4879,
45
- "step": 25
46
- },
47
- {
48
- "epoch": 0.7619047619047619,
49
- "grad_norm": 0.051714979112148285,
50
- "learning_rate": 9.412754953531663e-06,
51
- "loss": 0.5065,
52
- "step": 30
53
- },
54
- {
55
- "epoch": 0.8888888888888888,
56
- "grad_norm": 0.041197337210178375,
57
- "learning_rate": 2.475778302439524e-06,
58
- "loss": 0.5044,
59
- "step": 35
60
- },
61
- {
62
- "epoch": 0.9904761904761905,
63
- "step": 39,
64
- "total_flos": 1.1855717021096346e+17,
65
- "train_loss": 0.5348628270320404,
66
- "train_runtime": 803.1773,
67
- "train_samples_per_second": 1.569,
68
- "train_steps_per_second": 0.049
69
  }
70
  ],
71
  "logging_steps": 5,
72
- "max_steps": 39,
73
  "num_input_tokens_seen": 0,
74
  "num_train_epochs": 1,
75
  "save_steps": 1,
@@ -85,7 +36,7 @@
85
  "attributes": {}
86
  }
87
  },
88
- "total_flos": 1.1855717021096346e+17,
89
  "train_batch_size": 1,
90
  "trial_name": null,
91
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6666666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.6666666666666666,
14
+ "step": 1,
15
+ "total_flos": 5127839837847552.0,
16
+ "train_loss": 0.6043767929077148,
17
+ "train_runtime": 41.7151,
18
+ "train_samples_per_second": 1.151,
19
+ "train_steps_per_second": 0.024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
  ],
22
  "logging_steps": 5,
23
+ "max_steps": 1,
24
  "num_input_tokens_seen": 0,
25
  "num_train_epochs": 1,
26
  "save_steps": 1,
 
36
  "attributes": {}
37
  }
38
  },
39
+ "total_flos": 5127839837847552.0,
40
  "train_batch_size": 1,
41
  "trial_name": null,
42
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df62c6b3c891ce82ecd4497b02ca5a9f9048b25b92bbe3c2d8dfe6ccf7d014e4
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0315bf318104bf2951c648b1327dd0c081b6b91b6d39aede83507e1003d3f279
3
  size 6097