Spaces:
Build error
Build error
llama3 r4 p2 results
Browse files- .gitattributes +1 -0
- llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml +7 -6
- llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml +7 -6
- results/test_b-results_r4.csv +3 -0
- scripts/eval-mgtv-llama3_8b_en.sh +5 -5
- scripts/test-mgtv.sh +9 -3
- scripts/tune-mgtv-llama3_8b_en.sh +3 -1
- scripts/tune-mgtv.sh +1 -1
.gitattributes
CHANGED
|
@@ -98,3 +98,4 @@ results/mgtv-results_internlm_nv4090.csv filter=lfs diff=lfs merge=lfs -text
|
|
| 98 |
results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
|
| 99 |
results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
|
| 100 |
results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 98 |
results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
|
| 99 |
results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
|
| 100 |
results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
results/test_b-results_r4.csv filter=lfs diff=lfs merge=lfs -text
|
llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
### model
|
| 2 |
-
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
|
| 3 |
|
| 4 |
### method
|
| 5 |
stage: sft
|
|
@@ -14,15 +15,15 @@ upcast_layernorm: true
|
|
| 14 |
### dataset
|
| 15 |
dataset: alpaca_mgtv_p1_en
|
| 16 |
template: llama3
|
| 17 |
-
cutoff_len:
|
| 18 |
max_samples: 25000
|
| 19 |
overwrite_cache: true
|
| 20 |
preprocessing_num_workers: 16
|
| 21 |
|
| 22 |
### output
|
| 23 |
-
output_dir: saves/llama3-8b/lora/
|
| 24 |
logging_steps: 10
|
| 25 |
-
save_steps:
|
| 26 |
plot_loss: true
|
| 27 |
# overwrite_output_dir: true
|
| 28 |
|
|
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
|
|
| 40 |
val_size: 0.1
|
| 41 |
per_device_eval_batch_size: 1
|
| 42 |
eval_strategy: steps
|
| 43 |
-
eval_steps:
|
| 44 |
|
| 45 |
report_to: wandb
|
| 46 |
-
run_name:
|
|
|
|
| 1 |
### model
|
| 2 |
+
#model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
| 3 |
+
model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
|
| 4 |
|
| 5 |
### method
|
| 6 |
stage: sft
|
|
|
|
| 15 |
### dataset
|
| 16 |
dataset: alpaca_mgtv_p1_en
|
| 17 |
template: llama3
|
| 18 |
+
cutoff_len: 8192
|
| 19 |
max_samples: 25000
|
| 20 |
overwrite_cache: true
|
| 21 |
preprocessing_num_workers: 16
|
| 22 |
|
| 23 |
### output
|
| 24 |
+
output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
|
| 25 |
logging_steps: 10
|
| 26 |
+
save_steps: 35
|
| 27 |
plot_loss: true
|
| 28 |
# overwrite_output_dir: true
|
| 29 |
|
|
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
+
eval_steps: 35
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
+
run_name: llama3_8b_p1_en_r2 # optional
|
llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
### model
|
| 2 |
-
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
|
| 3 |
|
| 4 |
### method
|
| 5 |
stage: sft
|
|
@@ -14,15 +15,15 @@ upcast_layernorm: true
|
|
| 14 |
### dataset
|
| 15 |
dataset: alpaca_mgtv_p2_en
|
| 16 |
template: llama3
|
| 17 |
-
cutoff_len:
|
| 18 |
max_samples: 25000
|
| 19 |
overwrite_cache: true
|
| 20 |
preprocessing_num_workers: 16
|
| 21 |
|
| 22 |
### output
|
| 23 |
-
output_dir: saves/llama3-8b/lora/
|
| 24 |
logging_steps: 10
|
| 25 |
-
save_steps:
|
| 26 |
plot_loss: true
|
| 27 |
# overwrite_output_dir: true
|
| 28 |
|
|
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
|
|
| 40 |
val_size: 0.1
|
| 41 |
per_device_eval_batch_size: 1
|
| 42 |
eval_strategy: steps
|
| 43 |
-
eval_steps:
|
| 44 |
|
| 45 |
report_to: wandb
|
| 46 |
-
run_name:
|
|
|
|
| 1 |
### model
|
| 2 |
+
#model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
| 3 |
+
model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
|
| 4 |
|
| 5 |
### method
|
| 6 |
stage: sft
|
|
|
|
| 15 |
### dataset
|
| 16 |
dataset: alpaca_mgtv_p2_en
|
| 17 |
template: llama3
|
| 18 |
+
cutoff_len: 8192
|
| 19 |
max_samples: 25000
|
| 20 |
overwrite_cache: true
|
| 21 |
preprocessing_num_workers: 16
|
| 22 |
|
| 23 |
### output
|
| 24 |
+
output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
|
| 25 |
logging_steps: 10
|
| 26 |
+
save_steps: 35
|
| 27 |
plot_loss: true
|
| 28 |
# overwrite_output_dir: true
|
| 29 |
|
|
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
+
eval_steps: 35
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
+
run_name: llama3_8b_p2_en_r2 # optional
|
results/test_b-results_r4.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8caa5c7f153782389b90356cd6a59bf1b5ba976d993df896b2d0bfe666d8b64
|
| 3 |
+
size 23128244
|
scripts/eval-mgtv-llama3_8b_en.sh
CHANGED
|
@@ -24,17 +24,17 @@ export USING_LLAMA_FACTORY=true
|
|
| 24 |
export USE_ENGLISH_DATASETS=true
|
| 25 |
export USE_BF16_FOR_INFERENCE=true
|
| 26 |
|
| 27 |
-
export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
|
| 28 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
| 29 |
|
| 30 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-
|
| 31 |
-
|
| 32 |
export USING_P1_PROMPT_TEMPLATE=true
|
| 33 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
| 34 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
| 35 |
|
| 36 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-
|
| 37 |
-
|
| 38 |
export USING_P1_PROMPT_TEMPLATE=false
|
| 39 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
| 40 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
|
|
|
| 24 |
export USE_ENGLISH_DATASETS=true
|
| 25 |
export USE_BF16_FOR_INFERENCE=true
|
| 26 |
|
| 27 |
+
export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
|
| 28 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
| 29 |
|
| 30 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en_r2.csv
|
| 31 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
|
| 32 |
export USING_P1_PROMPT_TEMPLATE=true
|
| 33 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
| 34 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
| 35 |
|
| 36 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en_r2.csv
|
| 37 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
|
| 38 |
export USING_P1_PROMPT_TEMPLATE=false
|
| 39 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
| 40 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
scripts/test-mgtv.sh
CHANGED
|
@@ -13,16 +13,22 @@ cat /etc/os-release
|
|
| 13 |
lscpu
|
| 14 |
grep MemTotal /proc/meminfo
|
| 15 |
|
|
|
|
|
|
|
| 16 |
export LOAD_IN_4BIT=false
|
| 17 |
export USING_LLAMA_FACTORY=true
|
| 18 |
|
| 19 |
-
export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
|
| 20 |
# export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
|
| 21 |
-
export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
| 23 |
|
| 24 |
export TEST_DATA=test_b
|
| 25 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-
|
| 26 |
|
| 27 |
echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
|
| 28 |
python llm_toolkit/eval_logical_reasoning.py
|
|
|
|
| 13 |
lscpu
|
| 14 |
grep MemTotal /proc/meminfo
|
| 15 |
|
| 16 |
+
pip install transformers==4.41.2
|
| 17 |
+
|
| 18 |
export LOAD_IN_4BIT=false
|
| 19 |
export USING_LLAMA_FACTORY=true
|
| 20 |
|
| 21 |
+
#export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
|
| 22 |
# export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
|
| 23 |
+
#export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
|
| 24 |
+
|
| 25 |
+
export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
|
| 26 |
+
export ADAPTER_NAME_OR_PATH=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_r4/checkpoint-140
|
| 27 |
+
|
| 28 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
| 29 |
|
| 30 |
export TEST_DATA=test_b
|
| 31 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results_r4.csv
|
| 32 |
|
| 33 |
echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
|
| 34 |
python llm_toolkit/eval_logical_reasoning.py
|
scripts/tune-mgtv-llama3_8b_en.sh
CHANGED
|
@@ -18,9 +18,11 @@ grep MemTotal /proc/meminfo
|
|
| 18 |
#pip install transformers==4.41.2
|
| 19 |
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
| 20 |
|
|
|
|
|
|
|
| 21 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
| 22 |
|
| 23 |
-
export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
|
| 24 |
|
| 25 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
| 26 |
|
|
|
|
| 18 |
#pip install transformers==4.41.2
|
| 19 |
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
| 20 |
|
| 21 |
+
pip install -U transformers
|
| 22 |
+
|
| 23 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
| 24 |
|
| 25 |
+
export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
|
| 26 |
|
| 27 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
| 28 |
|
scripts/tune-mgtv.sh
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
tune-mgtv-
|
|
|
|
| 1 |
+
tune-mgtv-llama3_8b_en.sh
|