Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2718107304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a8513162caa5d3737291b9a77529b8bf201b097b574a72a4ec802346d071487
|
| 3 |
size 2718107304
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 145486330
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7930f19ab407a9573f12c17fd1b3af048e842e990b315cd3ef46705209aed468
|
| 3 |
size 145486330
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6d8c1e4a6e5d82bc88722704d97c55b34ef11ce759c09d1d12579f704419412
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5572383a3228bd80ae8f460d9587ee0e76e24dd65851719f3dadfa5ceb861f3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -715,6 +715,356 @@
|
|
| 715 |
"eval_samples_per_second": 9.647,
|
| 716 |
"eval_steps_per_second": 1.206,
|
| 717 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
}
|
| 719 |
],
|
| 720 |
"logging_steps": 10,
|
|
@@ -734,7 +1084,7 @@
|
|
| 734 |
"attributes": {}
|
| 735 |
}
|
| 736 |
},
|
| 737 |
-
"total_flos":
|
| 738 |
"train_batch_size": 4,
|
| 739 |
"trial_name": null,
|
| 740 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.2730748225013654,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 715 |
"eval_samples_per_second": 9.647,
|
| 716 |
"eval_steps_per_second": 1.206,
|
| 717 |
"step": 1000
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.1838703804842527,
|
| 721 |
+
"grad_norm": 8.284567832946777,
|
| 722 |
+
"learning_rate": 9.744772710182801e-05,
|
| 723 |
+
"loss": 2.899,
|
| 724 |
+
"step": 1010
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.18569087930092845,
|
| 728 |
+
"grad_norm": 6.623629570007324,
|
| 729 |
+
"learning_rate": 9.734755975598777e-05,
|
| 730 |
+
"loss": 2.9371,
|
| 731 |
+
"step": 1020
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.1875113781176042,
|
| 735 |
+
"grad_norm": 8.541956901550293,
|
| 736 |
+
"learning_rate": 9.724551796480459e-05,
|
| 737 |
+
"loss": 2.8807,
|
| 738 |
+
"step": 1030
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.18933187693428,
|
| 742 |
+
"grad_norm": 8.620600700378418,
|
| 743 |
+
"learning_rate": 9.714160576801696e-05,
|
| 744 |
+
"loss": 2.8888,
|
| 745 |
+
"step": 1040
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.19115237575095576,
|
| 749 |
+
"grad_norm": 8.644622802734375,
|
| 750 |
+
"learning_rate": 9.7035827279411e-05,
|
| 751 |
+
"loss": 2.8747,
|
| 752 |
+
"step": 1050
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.19297287456763154,
|
| 756 |
+
"grad_norm": 9.656100273132324,
|
| 757 |
+
"learning_rate": 9.692818668665752e-05,
|
| 758 |
+
"loss": 2.9203,
|
| 759 |
+
"step": 1060
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.1947933733843073,
|
| 763 |
+
"grad_norm": 10.529635429382324,
|
| 764 |
+
"learning_rate": 9.681868825114634e-05,
|
| 765 |
+
"loss": 2.9257,
|
| 766 |
+
"step": 1070
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.19661387220098306,
|
| 770 |
+
"grad_norm": 8.376754760742188,
|
| 771 |
+
"learning_rate": 9.670733630781747e-05,
|
| 772 |
+
"loss": 2.8864,
|
| 773 |
+
"step": 1080
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.19843437101765884,
|
| 777 |
+
"grad_norm": 8.018802642822266,
|
| 778 |
+
"learning_rate": 9.659413526498962e-05,
|
| 779 |
+
"loss": 2.8672,
|
| 780 |
+
"step": 1090
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.2002548698343346,
|
| 784 |
+
"grad_norm": 7.348598480224609,
|
| 785 |
+
"learning_rate": 9.647908960418553e-05,
|
| 786 |
+
"loss": 2.8528,
|
| 787 |
+
"step": 1100
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.2020753686510104,
|
| 791 |
+
"grad_norm": 7.87021017074585,
|
| 792 |
+
"learning_rate": 9.636220387995469e-05,
|
| 793 |
+
"loss": 2.8713,
|
| 794 |
+
"step": 1110
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.20389586746768615,
|
| 798 |
+
"grad_norm": 8.476405143737793,
|
| 799 |
+
"learning_rate": 9.624348271969295e-05,
|
| 800 |
+
"loss": 2.8667,
|
| 801 |
+
"step": 1120
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.2057163662843619,
|
| 805 |
+
"grad_norm": 8.64283561706543,
|
| 806 |
+
"learning_rate": 9.612293082345931e-05,
|
| 807 |
+
"loss": 2.8523,
|
| 808 |
+
"step": 1130
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.2075368651010377,
|
| 812 |
+
"grad_norm": 10.11330795288086,
|
| 813 |
+
"learning_rate": 9.600055296378995e-05,
|
| 814 |
+
"loss": 2.8375,
|
| 815 |
+
"step": 1140
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.20935736391771345,
|
| 819 |
+
"grad_norm": 8.217743873596191,
|
| 820 |
+
"learning_rate": 9.58763539855092e-05,
|
| 821 |
+
"loss": 2.8685,
|
| 822 |
+
"step": 1150
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.2111778627343892,
|
| 826 |
+
"grad_norm": 7.501378536224365,
|
| 827 |
+
"learning_rate": 9.575033880553774e-05,
|
| 828 |
+
"loss": 2.8349,
|
| 829 |
+
"step": 1160
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.212998361551065,
|
| 833 |
+
"grad_norm": 8.812211036682129,
|
| 834 |
+
"learning_rate": 9.562251241269798e-05,
|
| 835 |
+
"loss": 2.8384,
|
| 836 |
+
"step": 1170
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.21481886036774075,
|
| 840 |
+
"grad_norm": 7.964756011962891,
|
| 841 |
+
"learning_rate": 9.549287986751655e-05,
|
| 842 |
+
"loss": 2.8653,
|
| 843 |
+
"step": 1180
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 0.21663935918441654,
|
| 847 |
+
"grad_norm": 7.216350555419922,
|
| 848 |
+
"learning_rate": 9.536144630202395e-05,
|
| 849 |
+
"loss": 2.8276,
|
| 850 |
+
"step": 1190
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 0.2184598580010923,
|
| 854 |
+
"grad_norm": 7.890927314758301,
|
| 855 |
+
"learning_rate": 9.522821691955135e-05,
|
| 856 |
+
"loss": 2.7802,
|
| 857 |
+
"step": 1200
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 0.22028035681776806,
|
| 861 |
+
"grad_norm": 8.259157180786133,
|
| 862 |
+
"learning_rate": 9.509319699452469e-05,
|
| 863 |
+
"loss": 2.8407,
|
| 864 |
+
"step": 1210
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 0.22210085563444384,
|
| 868 |
+
"grad_norm": 7.810998916625977,
|
| 869 |
+
"learning_rate": 9.495639187225575e-05,
|
| 870 |
+
"loss": 2.8374,
|
| 871 |
+
"step": 1220
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 0.2239213544511196,
|
| 875 |
+
"grad_norm": 6.905944347381592,
|
| 876 |
+
"learning_rate": 9.481780696873059e-05,
|
| 877 |
+
"loss": 2.8342,
|
| 878 |
+
"step": 1230
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.2257418532677954,
|
| 882 |
+
"grad_norm": 8.832979202270508,
|
| 883 |
+
"learning_rate": 9.467744777039517e-05,
|
| 884 |
+
"loss": 2.7816,
|
| 885 |
+
"step": 1240
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.22756235208447115,
|
| 889 |
+
"grad_norm": 6.949944972991943,
|
| 890 |
+
"learning_rate": 9.453531983393809e-05,
|
| 891 |
+
"loss": 2.8104,
|
| 892 |
+
"step": 1250
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.2293828509011469,
|
| 896 |
+
"grad_norm": 11.183205604553223,
|
| 897 |
+
"learning_rate": 9.439142878607061e-05,
|
| 898 |
+
"loss": 2.8605,
|
| 899 |
+
"step": 1260
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.2312033497178227,
|
| 903 |
+
"grad_norm": 8.672426223754883,
|
| 904 |
+
"learning_rate": 9.424578032330398e-05,
|
| 905 |
+
"loss": 2.7866,
|
| 906 |
+
"step": 1270
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.23302384853449845,
|
| 910 |
+
"grad_norm": 8.570023536682129,
|
| 911 |
+
"learning_rate": 9.409838021172375e-05,
|
| 912 |
+
"loss": 2.7814,
|
| 913 |
+
"step": 1280
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.2348443473511742,
|
| 917 |
+
"grad_norm": 17.605865478515625,
|
| 918 |
+
"learning_rate": 9.394923428676168e-05,
|
| 919 |
+
"loss": 2.8896,
|
| 920 |
+
"step": 1290
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.23666484616785,
|
| 924 |
+
"grad_norm": 8.613877296447754,
|
| 925 |
+
"learning_rate": 9.379834845296463e-05,
|
| 926 |
+
"loss": 2.8474,
|
| 927 |
+
"step": 1300
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.23848534498452575,
|
| 931 |
+
"grad_norm": 9.39710807800293,
|
| 932 |
+
"learning_rate": 9.364572868376075e-05,
|
| 933 |
+
"loss": 2.7771,
|
| 934 |
+
"step": 1310
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.24030584380120154,
|
| 938 |
+
"grad_norm": 12.333969116210938,
|
| 939 |
+
"learning_rate": 9.349138102122316e-05,
|
| 940 |
+
"loss": 2.8079,
|
| 941 |
+
"step": 1320
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.2421263426178773,
|
| 945 |
+
"grad_norm": 10.491060256958008,
|
| 946 |
+
"learning_rate": 9.333531157583055e-05,
|
| 947 |
+
"loss": 2.7536,
|
| 948 |
+
"step": 1330
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.24394684143455306,
|
| 952 |
+
"grad_norm": 9.862618446350098,
|
| 953 |
+
"learning_rate": 9.317752652622547e-05,
|
| 954 |
+
"loss": 2.8011,
|
| 955 |
+
"step": 1340
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.24576734025122884,
|
| 959 |
+
"grad_norm": 11.95722484588623,
|
| 960 |
+
"learning_rate": 9.301803211896955e-05,
|
| 961 |
+
"loss": 2.8058,
|
| 962 |
+
"step": 1350
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.2475878390679046,
|
| 966 |
+
"grad_norm": 8.709095001220703,
|
| 967 |
+
"learning_rate": 9.28568346682963e-05,
|
| 968 |
+
"loss": 2.7922,
|
| 969 |
+
"step": 1360
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 0.2494083378845804,
|
| 973 |
+
"grad_norm": 6.32808256149292,
|
| 974 |
+
"learning_rate": 9.269394055586116e-05,
|
| 975 |
+
"loss": 2.7246,
|
| 976 |
+
"step": 1370
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 0.2512288367012561,
|
| 980 |
+
"grad_norm": 10.615900039672852,
|
| 981 |
+
"learning_rate": 9.252935623048875e-05,
|
| 982 |
+
"loss": 2.7993,
|
| 983 |
+
"step": 1380
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 0.2530493355179319,
|
| 987 |
+
"grad_norm": 10.374322891235352,
|
| 988 |
+
"learning_rate": 9.236308820791768e-05,
|
| 989 |
+
"loss": 2.7583,
|
| 990 |
+
"step": 1390
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 0.2548698343346077,
|
| 994 |
+
"grad_norm": 11.486263275146484,
|
| 995 |
+
"learning_rate": 9.219514307054251e-05,
|
| 996 |
+
"loss": 2.8258,
|
| 997 |
+
"step": 1400
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 0.2566903331512835,
|
| 1001 |
+
"grad_norm": 9.840982437133789,
|
| 1002 |
+
"learning_rate": 9.202552746715322e-05,
|
| 1003 |
+
"loss": 2.8464,
|
| 1004 |
+
"step": 1410
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 0.2585108319679592,
|
| 1008 |
+
"grad_norm": 15.894274711608887,
|
| 1009 |
+
"learning_rate": 9.185424811267199e-05,
|
| 1010 |
+
"loss": 2.8465,
|
| 1011 |
+
"step": 1420
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 0.260331330784635,
|
| 1015 |
+
"grad_norm": 8.428662300109863,
|
| 1016 |
+
"learning_rate": 9.168131178788726e-05,
|
| 1017 |
+
"loss": 2.8095,
|
| 1018 |
+
"step": 1430
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 0.2621518296013108,
|
| 1022 |
+
"grad_norm": 17.082258224487305,
|
| 1023 |
+
"learning_rate": 9.150672533918544e-05,
|
| 1024 |
+
"loss": 2.7782,
|
| 1025 |
+
"step": 1440
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 0.2639723284179865,
|
| 1029 |
+
"grad_norm": 7.154361724853516,
|
| 1030 |
+
"learning_rate": 9.133049567827982e-05,
|
| 1031 |
+
"loss": 2.7773,
|
| 1032 |
+
"step": 1450
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 0.2657928272346623,
|
| 1036 |
+
"grad_norm": 6.119648456573486,
|
| 1037 |
+
"learning_rate": 9.115262978193679e-05,
|
| 1038 |
+
"loss": 2.7788,
|
| 1039 |
+
"step": 1460
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 0.2676133260513381,
|
| 1043 |
+
"grad_norm": 8.635058403015137,
|
| 1044 |
+
"learning_rate": 9.097313469169988e-05,
|
| 1045 |
+
"loss": 2.7703,
|
| 1046 |
+
"step": 1470
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 0.2694338248680138,
|
| 1050 |
+
"grad_norm": 12.325600624084473,
|
| 1051 |
+
"learning_rate": 9.079201751361082e-05,
|
| 1052 |
+
"loss": 2.7313,
|
| 1053 |
+
"step": 1480
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 0.2712543236846896,
|
| 1057 |
+
"grad_norm": 8.181892395019531,
|
| 1058 |
+
"learning_rate": 9.06092854179283e-05,
|
| 1059 |
+
"loss": 2.7795,
|
| 1060 |
+
"step": 1490
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 0.2730748225013654,
|
| 1064 |
+
"grad_norm": 14.719033241271973,
|
| 1065 |
+
"learning_rate": 9.042494563884404e-05,
|
| 1066 |
+
"loss": 2.8108,
|
| 1067 |
+
"step": 1500
|
| 1068 |
}
|
| 1069 |
],
|
| 1070 |
"logging_steps": 10,
|
|
|
|
| 1084 |
"attributes": {}
|
| 1085 |
}
|
| 1086 |
},
|
| 1087 |
+
"total_flos": 3.50721698955264e+17,
|
| 1088 |
"train_batch_size": 4,
|
| 1089 |
"trial_name": null,
|
| 1090 |
"trial_params": null
|