prithivMLmods commited on Jun 4

Commit

6f426c0

verified ·

1 Parent(s): 2c7417f

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoint-1000/config.json +38 -0
checkpoint-1000/generation_config.json +6 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +48 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1500/config.json +38 -0
checkpoint-1500/generation_config.json +6 -0
checkpoint-1500/model.safetensors +3 -0
checkpoint-1500/optimizer.pt +3 -0
checkpoint-1500/rng_state.pth +3 -0
checkpoint-1500/scheduler.pt +3 -0
checkpoint-1500/trainer_state.json +55 -0
checkpoint-1500/training_args.bin +3 -0
checkpoint-2000/config.json +38 -0
checkpoint-2000/generation_config.json +6 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +62 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-2500/config.json +38 -0
checkpoint-2500/generation_config.json +6 -0
checkpoint-2500/model.safetensors +3 -0
checkpoint-2500/optimizer.pt +3 -0
checkpoint-2500/rng_state.pth +3 -0
checkpoint-2500/scheduler.pt +3 -0
checkpoint-2500/trainer_state.json +69 -0
checkpoint-2500/training_args.bin +3 -0
checkpoint-3000/config.json +38 -0
checkpoint-3000/generation_config.json +6 -0
checkpoint-3000/model.safetensors +3 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/trainer_state.json +76 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-3500/config.json +38 -0
checkpoint-3500/generation_config.json +6 -0
checkpoint-3500/model.safetensors +3 -0
checkpoint-3500/optimizer.pt +3 -0
checkpoint-3500/rng_state.pth +3 -0
checkpoint-3500/scheduler.pt +3 -0
checkpoint-3500/trainer_state.json +83 -0
checkpoint-3500/training_args.bin +3 -0
checkpoint-4000/config.json +38 -0
checkpoint-4000/generation_config.json +6 -0

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5f29d91c8113d1fccb3de5aa208450ca9977e3e73106b331b5b834c02c968e8
+size 497774208

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a85b0f11007e08c390566742a151267e2c29335761b97a4590b9f0cfd21bcb06
+size 995642298

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9de967e1901a1f83ee62c42218324ff0ef95543cf28963c868cc8805f356203
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5255b856e77ae07b5d665e31bcb5982e777d28e3803bba67e981282ca7f3efc2
+size 1064

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.10893246187363835,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 261292032000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-1500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d74bf00b6b6d893e15990c1f046404c7ef6d67a6b925f403e926e17ee52f3d67
+size 497774208

checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96c11471d1a5066b299b467ba0fb98cb39e8254ee9b156fb8e603fa614931322
+size 995642298

checkpoint-1500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a52b643f4da984a3fc7ecc63f0d369cb90f514ea973ef64d8460181f91561448
+size 14244

checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d056bf19d2ed63fb9824962abd8fe831ffa71f680bd984206b22c08319014dd3
+size 1064

checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.16339869281045752,
+  "eval_steps": 500,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16339869281045752,
+      "grad_norm": 0.5679114460945129,
+      "learning_rate": 4.423963133640553e-05,
+      "loss": 1.3809,
+      "step": 1500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 391938048000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fd5e730b214d372bd72f8c4be30bddff339a7478680626139ab19ca81437248
+size 497774208

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d86438c1f87fa6cafaf2248c39e16258c155c9dcc346201d2754a1a27b8fd4d
+size 995642298

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21dab0a710a52b38589f7c68fcbe22f2f97319c3fc4c23b47e7f7f450adc99af
+size 14244

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e01b325926de808d22fd22f729777aed49c3f3e89abceff3fc429ae98b7ed6a5
+size 1064

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2178649237472767,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16339869281045752,
+      "grad_norm": 0.5679114460945129,
+      "learning_rate": 4.423963133640553e-05,
+      "loss": 1.3809,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2178649237472767,
+      "grad_norm": 1.4229077100753784,
+      "learning_rate": 4.13594470046083e-05,
+      "loss": 1.2784,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 522584064000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-2500/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-2500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe6089db78a6f9849494c806faaac7e44ac06167820943f239629c1da626ed7
+size 497774208

checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0769540809050ac34b27b1f37217afd4bdbf73775c58507dd43b5d99cef1af0d
+size 995642298

checkpoint-2500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc456a1b551ff64da97202cd09b035aa3c7518ddb546dbc0ed234a924f9f9e0
+size 14244

checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaa960551ecb6751541543af4e08deb409de73233d127d35292337960fdabc3a
+size 1064

checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.27233115468409586,
+  "eval_steps": 500,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16339869281045752,
+      "grad_norm": 0.5679114460945129,
+      "learning_rate": 4.423963133640553e-05,
+      "loss": 1.3809,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2178649237472767,
+      "grad_norm": 1.4229077100753784,
+      "learning_rate": 4.13594470046083e-05,
+      "loss": 1.2784,
+      "step": 2000
+    },
+    {
+      "epoch": 0.27233115468409586,
+      "grad_norm": 1.4274455308914185,
+      "learning_rate": 3.847926267281106e-05,
+      "loss": 1.2724,
+      "step": 2500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 653230080000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-3000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-3000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:429179dc830847608cf995a0c189b5fe8432d388d05d5dbbcf1f73eeff8ae722
+size 497774208

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac766763cfe2d922875ecc4ca62a2aaa918557cd309b5c17307d9c85445db728
+size 995642298

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa1b086467d773850d77f5d1925d8b78501e5b7b4d87ddada3a4275e62b85377
+size 14244

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c7701509355468b66df64bc88943f8739c244356a7900b57d3fac69bdbc3a42
+size 1064

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.32679738562091504,
+  "eval_steps": 500,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16339869281045752,
+      "grad_norm": 0.5679114460945129,
+      "learning_rate": 4.423963133640553e-05,
+      "loss": 1.3809,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2178649237472767,
+      "grad_norm": 1.4229077100753784,
+      "learning_rate": 4.13594470046083e-05,
+      "loss": 1.2784,
+      "step": 2000
+    },
+    {
+      "epoch": 0.27233115468409586,
+      "grad_norm": 1.4274455308914185,
+      "learning_rate": 3.847926267281106e-05,
+      "loss": 1.2724,
+      "step": 2500
+    },
+    {
+      "epoch": 0.32679738562091504,
+      "grad_norm": 1.3789653778076172,
+      "learning_rate": 3.559907834101383e-05,
+      "loss": 1.2497,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 783876096000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-3500/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-3500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}

checkpoint-3500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:720835076f3e65ea605ce1f4bbf0f0fb79caea39712d572ed547b44e045d3a23
+size 497774208

checkpoint-3500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c806075de53cbc330d879bc1ac17d2bda86f90e2489acf45d970003de31fcba4
+size 995642298

checkpoint-3500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2b2a7ae2e95906148def4e9d2a8982d4192a6ed553f8db64418d4f9b312e4d
+size 14244

checkpoint-3500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d989adbe7b477a299784449defb3b7e0be2bffc7393bfb9e63c4c3f29a629396
+size 1064

checkpoint-3500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.3812636165577342,
+  "eval_steps": 500,
+  "global_step": 3500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054466230936819175,
+      "grad_norm": 5.168296813964844,
+      "learning_rate": 5e-05,
+      "loss": 1.747,
+      "step": 500
+    },
+    {
+      "epoch": 0.10893246187363835,
+      "grad_norm": 0.6366938948631287,
+      "learning_rate": 4.711981566820277e-05,
+      "loss": 1.3516,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16339869281045752,
+      "grad_norm": 0.5679114460945129,
+      "learning_rate": 4.423963133640553e-05,
+      "loss": 1.3809,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2178649237472767,
+      "grad_norm": 1.4229077100753784,
+      "learning_rate": 4.13594470046083e-05,
+      "loss": 1.2784,
+      "step": 2000
+    },
+    {
+      "epoch": 0.27233115468409586,
+      "grad_norm": 1.4274455308914185,
+      "learning_rate": 3.847926267281106e-05,
+      "loss": 1.2724,
+      "step": 2500
+    },
+    {
+      "epoch": 0.32679738562091504,
+      "grad_norm": 1.3789653778076172,
+      "learning_rate": 3.559907834101383e-05,
+      "loss": 1.2497,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3812636165577342,
+      "grad_norm": 1.8324720859527588,
+      "learning_rate": 3.271889400921659e-05,
+      "loss": 1.2794,
+      "step": 3500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 9180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 914522112000000.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbab60857fc9fddbc7593bfddee8002451e5c50b78b888987fa868bd9cabe9b8
+size 5304

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-4000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.0"
+}