WinstonShum/lora_model_causal_llama_3.1
Browse files- README.md +3 -3
- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- runs/Aug14_20-28-21_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667301.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- runs/Aug14_20-29-33_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667374.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- runs/Aug14_20-30-58_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667458.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- runs/Aug14_20-33-48_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667628.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- runs/Aug14_20-37-12_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667833.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- runs/Aug14_20-38-44_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667925.a100-4-40gb-1-6bce-head-dwq1ibfh-compute +3 -0
- training_args.bin +1 -1
    	
        README.md
    CHANGED
    
    | @@ -35,14 +35,14 @@ More information needed | |
| 35 | 
             
            ### Training hyperparameters
         | 
| 36 |  | 
| 37 | 
             
            The following hyperparameters were used during training:
         | 
| 38 | 
            -
            - learning_rate:  | 
| 39 | 
             
            - train_batch_size: 2
         | 
| 40 | 
             
            - eval_batch_size: 8
         | 
| 41 | 
             
            - seed: 3407
         | 
| 42 | 
             
            - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
         | 
| 43 | 
             
            - lr_scheduler_type: cosine
         | 
| 44 | 
            -
            - lr_scheduler_warmup_steps:  | 
| 45 | 
            -
            - num_epochs:  | 
| 46 |  | 
| 47 | 
             
            ### Training results
         | 
| 48 |  | 
|  | |
| 35 | 
             
            ### Training hyperparameters
         | 
| 36 |  | 
| 37 | 
             
            The following hyperparameters were used during training:
         | 
| 38 | 
            +
            - learning_rate: 5e-05
         | 
| 39 | 
             
            - train_batch_size: 2
         | 
| 40 | 
             
            - eval_batch_size: 8
         | 
| 41 | 
             
            - seed: 3407
         | 
| 42 | 
             
            - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
         | 
| 43 | 
             
            - lr_scheduler_type: cosine
         | 
| 44 | 
            +
            - lr_scheduler_warmup_steps: 50
         | 
| 45 | 
            +
            - num_epochs: 3
         | 
| 46 |  | 
| 47 | 
             
            ### Training results
         | 
| 48 |  | 
    	
        adapter_config.json
    CHANGED
    
    | @@ -14,7 +14,7 @@ | |
| 14 | 
             
              "layers_to_transform": null,
         | 
| 15 | 
             
              "loftq_config": {},
         | 
| 16 | 
             
              "lora_alpha": 8,
         | 
| 17 | 
            -
              "lora_dropout": 0. | 
| 18 | 
             
              "megatron_config": null,
         | 
| 19 | 
             
              "megatron_core": "megatron.core",
         | 
| 20 | 
             
              "modules_to_save": null,
         | 
| @@ -23,13 +23,13 @@ | |
| 23 | 
             
              "rank_pattern": {},
         | 
| 24 | 
             
              "revision": null,
         | 
| 25 | 
             
              "target_modules": [
         | 
| 26 | 
            -
                " | 
| 27 | 
            -
                "v_proj",
         | 
| 28 | 
             
                "gate_proj",
         | 
|  | |
| 29 | 
             
                "k_proj",
         | 
| 30 | 
             
                "q_proj",
         | 
| 31 | 
             
                "down_proj",
         | 
| 32 | 
            -
                " | 
| 33 | 
             
              ],
         | 
| 34 | 
             
              "task_type": null,
         | 
| 35 | 
             
              "use_dora": false,
         | 
|  | |
| 14 | 
             
              "layers_to_transform": null,
         | 
| 15 | 
             
              "loftq_config": {},
         | 
| 16 | 
             
              "lora_alpha": 8,
         | 
| 17 | 
            +
              "lora_dropout": 0.1,
         | 
| 18 | 
             
              "megatron_config": null,
         | 
| 19 | 
             
              "megatron_core": "megatron.core",
         | 
| 20 | 
             
              "modules_to_save": null,
         | 
|  | |
| 23 | 
             
              "rank_pattern": {},
         | 
| 24 | 
             
              "revision": null,
         | 
| 25 | 
             
              "target_modules": [
         | 
| 26 | 
            +
                "up_proj",
         | 
|  | |
| 27 | 
             
                "gate_proj",
         | 
| 28 | 
            +
                "v_proj",
         | 
| 29 | 
             
                "k_proj",
         | 
| 30 | 
             
                "q_proj",
         | 
| 31 | 
             
                "down_proj",
         | 
| 32 | 
            +
                "o_proj"
         | 
| 33 | 
             
              ],
         | 
| 34 | 
             
              "task_type": null,
         | 
| 35 | 
             
              "use_dora": false,
         | 
    	
        adapter_model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 167832240
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:2e82bedcb596a1a946480177dd7b024bc7767cefe5bdd14830d2e2815e7de20b
         | 
| 3 | 
             
            size 167832240
         | 
    	
        runs/Aug14_20-28-21_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667301.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:c8fa9c121214c40b2a4f10fd670859c7df23980142a72cb649e49cc1c2934847
         | 
| 3 | 
            +
            size 5661
         | 
    	
        runs/Aug14_20-29-33_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667374.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:10133d72e742f3585869600fc8524fe88d64889d1d4c549986dc2500b505eba5
         | 
| 3 | 
            +
            size 5661
         | 
    	
        runs/Aug14_20-30-58_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667458.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f6d782ed8214a698959b38049196c0da281c48944feb29949215cbb8fd1b4a87
         | 
| 3 | 
            +
            size 5661
         | 
    	
        runs/Aug14_20-33-48_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667628.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:70c677b24d64d3808c16eeaf8ffae199ddaa927bc46b07ee84ee2c717c8dabd8
         | 
| 3 | 
            +
            size 5661
         | 
    	
        runs/Aug14_20-37-12_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667833.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:63354ab73b1d4dfe7bb84b6b7dab4b0c69851cb5bbd1a40cd750b463a2ae46ab
         | 
| 3 | 
            +
            size 7938
         | 
    	
        runs/Aug14_20-38-44_a100-4-40gb-1-6bce-head-dwq1ibfh-compute/events.out.tfevents.1723667925.a100-4-40gb-1-6bce-head-dwq1ibfh-compute
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1914e529337dd6890569b4abc710759dc5309b9bbbb6e971bb01a0346225f7ac
         | 
| 3 | 
            +
            size 665726
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 5176
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:37184ae993c0c2a43a4cffa3c56a066dd11732254dc6188b79fb5f7079320dfb
         | 
| 3 | 
             
            size 5176
         |