Gabe-Thomp commited on
Commit
615a5fc
·
verified ·
1 Parent(s): a2cb81e

End of training

Browse files
README.md CHANGED
@@ -1,17 +1,19 @@
1
  ---
2
  base_model: google/gemma-2-9b-it
 
3
  library_name: transformers
4
  model_name: gemma-sft-bayesian-lr2.0e-06_assistant_only
5
  tags:
6
  - generated_from_trainer
7
  - sft
8
  - trl
 
9
  licence: license
10
  ---
11
 
12
  # Model Card for gemma-sft-bayesian-lr2.0e-06_assistant_only
13
 
14
- This model is a fine-tuned version of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
2
  base_model: google/gemma-2-9b-it
3
+ datasets: Gabe-Thomp/gemma-bayesian-training
4
  library_name: transformers
5
  model_name: gemma-sft-bayesian-lr2.0e-06_assistant_only
6
  tags:
7
  - generated_from_trainer
8
  - sft
9
  - trl
10
+ - alignment-handbook
11
  licence: license
12
  ---
13
 
14
  # Model Card for gemma-sft-bayesian-lr2.0e-06_assistant_only
15
 
16
+ This model is a fine-tuned version of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it) on the [Gabe-Thomp/gemma-bayesian-training](https://huggingface.co/datasets/Gabe-Thomp/gemma-bayesian-training) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
all_results.json CHANGED
@@ -1,4 +1,9 @@
1
  {
 
 
 
 
 
2
  "total_flos": 42494027431936.0,
3
  "train_loss": 0.12046583356164026,
4
  "train_runtime": 4906.2287,
 
1
  {
2
+ "eval_loss": 0.10729417949914932,
3
+ "eval_runtime": 18.0449,
4
+ "eval_samples": 240,
5
+ "eval_samples_per_second": 13.3,
6
+ "eval_steps_per_second": 0.831,
7
  "total_flos": 42494027431936.0,
8
  "train_loss": 0.12046583356164026,
9
  "train_runtime": 4906.2287,
config.json CHANGED
@@ -72,6 +72,6 @@
72
  "sliding_window_size": 4096,
73
  "torch_dtype": "bfloat16",
74
  "transformers_version": "4.54.0",
75
- "use_cache": false,
76
  "vocab_size": 256000
77
  }
 
72
  "sliding_window_size": 4096,
73
  "torch_dtype": "bfloat16",
74
  "transformers_version": "4.54.0",
75
+ "use_cache": true,
76
  "vocab_size": 256000
77
  }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_loss": 0.10729417949914932,
3
+ "eval_runtime": 18.0449,
4
+ "eval_samples": 240,
5
+ "eval_samples_per_second": 13.3,
6
+ "eval_steps_per_second": 0.831
7
+ }
runs/Jul27_01-16-54_bobu-l40s-1.csail.mit.edu/events.out.tfevents.1753598605.bobu-l40s-1.csail.mit.edu.2571914.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44f713bd22a0f52cd6e11396b8f46e3355978137425e2aec6ab91a221424675
3
+ size 476