Spaces:
Build error
Build error
qwen2.5-1.5b
Browse files
data/Qwen2.5-0.5B-Instruct_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Qwen2.5-1.5B-Instruct_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/07_Qwen2.5_models.ipynb
CHANGED
|
@@ -19954,13 +19954,337 @@
|
|
| 19954 |
},
|
| 19955 |
{
|
| 19956 |
"cell_type": "code",
|
| 19957 |
-
"execution_count":
|
| 19958 |
"metadata": {},
|
| 19959 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19960 |
"source": [
|
| 19961 |
"%%time\n",
|
| 19962 |
"\n",
|
| 19963 |
-
"!./scripts/eval-mgtv-qwen2.
|
| 19964 |
]
|
| 19965 |
}
|
| 19966 |
],
|
|
|
|
| 19954 |
},
|
| 19955 |
{
|
| 19956 |
"cell_type": "code",
|
| 19957 |
+
"execution_count": 8,
|
| 19958 |
"metadata": {},
|
| 19959 |
+
"outputs": [
|
| 19960 |
+
{
|
| 19961 |
+
"name": "stdout",
|
| 19962 |
+
"output_type": "stream",
|
| 19963 |
+
"text": [
|
| 19964 |
+
"Current Directory:\n",
|
| 19965 |
+
"/home/inflaton/code/logical-reasoning\n",
|
| 19966 |
+
"Sat Sep 21 23:55:43 2024 \n",
|
| 19967 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
| 19968 |
+
"| NVIDIA-SMI 560.35.02 Driver Version: 560.94 CUDA Version: 12.6 |\n",
|
| 19969 |
+
"|-----------------------------------------+------------------------+----------------------+\n",
|
| 19970 |
+
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
| 19971 |
+
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
|
| 19972 |
+
"| | | MIG M. |\n",
|
| 19973 |
+
"|=========================================+========================+======================|\n",
|
| 19974 |
+
"| 0 NVIDIA GeForce RTX 4090 On | 00000000:01:00.0 Off | Off |\n",
|
| 19975 |
+
"| 54% 59C P3 41W / 450W | 471MiB / 24564MiB | 2% Default |\n",
|
| 19976 |
+
"| | | N/A |\n",
|
| 19977 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
| 19978 |
+
" \n",
|
| 19979 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
| 19980 |
+
"| Processes: |\n",
|
| 19981 |
+
"| GPU GI CI PID Type Process name GPU Memory |\n",
|
| 19982 |
+
"| ID ID Usage |\n",
|
| 19983 |
+
"|=========================================================================================|\n",
|
| 19984 |
+
"| 0 N/A N/A 25 G /Xwayland N/A |\n",
|
| 19985 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
| 19986 |
+
"Linux Gen-AI 5.15.133.1-microsoft-standard-WSL2 #1 SMP Thu Oct 5 21:02:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux\n",
|
| 19987 |
+
"PRETTY_NAME=\"Ubuntu 22.04.2 LTS\"\n",
|
| 19988 |
+
"NAME=\"Ubuntu\"\n",
|
| 19989 |
+
"VERSION_ID=\"22.04\"\n",
|
| 19990 |
+
"VERSION=\"22.04.2 LTS (Jammy Jellyfish)\"\n",
|
| 19991 |
+
"VERSION_CODENAME=jammy\n",
|
| 19992 |
+
"ID=ubuntu\n",
|
| 19993 |
+
"ID_LIKE=debian\n",
|
| 19994 |
+
"HOME_URL=\"https://www.ubuntu.com/\"\n",
|
| 19995 |
+
"SUPPORT_URL=\"https://help.ubuntu.com/\"\n",
|
| 19996 |
+
"BUG_REPORT_URL=\"https://bugs.launchpad.net/ubuntu/\"\n",
|
| 19997 |
+
"PRIVACY_POLICY_URL=\"https://www.ubuntu.com/legal/terms-and-policies/privacy-policy\"\n",
|
| 19998 |
+
"UBUNTU_CODENAME=jammy\n",
|
| 19999 |
+
"Architecture: x86_64\n",
|
| 20000 |
+
" CPU op-mode(s): 32-bit, 64-bit\n",
|
| 20001 |
+
" Address sizes: 39 bits physical, 48 bits virtual\n",
|
| 20002 |
+
" Byte Order: Little Endian\n",
|
| 20003 |
+
"CPU(s): 32\n",
|
| 20004 |
+
" On-line CPU(s) list: 0-31\n",
|
| 20005 |
+
"Vendor ID: GenuineIntel\n",
|
| 20006 |
+
" Model name: 13th Gen Intel(R) Core(TM) i9-13900KF\n",
|
| 20007 |
+
" CPU family: 6\n",
|
| 20008 |
+
" Model: 183\n",
|
| 20009 |
+
" Thread(s) per core: 2\n",
|
| 20010 |
+
" Core(s) per socket: 16\n",
|
| 20011 |
+
" Socket(s): 1\n",
|
| 20012 |
+
" Stepping: 1\n",
|
| 20013 |
+
" BogoMIPS: 5990.39\n",
|
| 20014 |
+
" Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc\n",
|
| 20015 |
+
" a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal\n",
|
| 20016 |
+
" l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo\n",
|
| 20017 |
+
" logy tsc_reliable nonstop_tsc cpuid pni pclmulqdq vmx s\n",
|
| 20018 |
+
" sse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt tsc_dea\n",
|
| 20019 |
+
" dline_timer aes xsave avx f16c rdrand hypervisor lahf_l\n",
|
| 20020 |
+
" m abm 3dnowprefetch ssbd ibrs ibpb stibp ibrs_enhanced \n",
|
| 20021 |
+
" tpr_shadow vnmi ept vpid ept_ad fsgsbase tsc_adjust bmi\n",
|
| 20022 |
+
" 1 avx2 smep bmi2 erms invpcid rdseed adx smap clflushop\n",
|
| 20023 |
+
" t clwb sha_ni xsaveopt xsavec xgetbv1 xsaves avx_vnni u\n",
|
| 20024 |
+
" mip waitpkg gfni vaes vpclmulqdq rdpid movdiri movdir64\n",
|
| 20025 |
+
" b fsrm md_clear serialize flush_l1d arch_capabilities\n",
|
| 20026 |
+
"Virtualization features: \n",
|
| 20027 |
+
" Virtualization: VT-x\n",
|
| 20028 |
+
" Hypervisor vendor: Microsoft\n",
|
| 20029 |
+
" Virtualization type: full\n",
|
| 20030 |
+
"Caches (sum of all): \n",
|
| 20031 |
+
" L1d: 768 KiB (16 instances)\n",
|
| 20032 |
+
" L1i: 512 KiB (16 instances)\n",
|
| 20033 |
+
" L2: 32 MiB (16 instances)\n",
|
| 20034 |
+
" L3: 36 MiB (1 instance)\n",
|
| 20035 |
+
"Vulnerabilities: \n",
|
| 20036 |
+
" Gather data sampling: Not affected\n",
|
| 20037 |
+
" Itlb multihit: Not affected\n",
|
| 20038 |
+
" L1tf: Not affected\n",
|
| 20039 |
+
" Mds: Not affected\n",
|
| 20040 |
+
" Meltdown: Not affected\n",
|
| 20041 |
+
" Mmio stale data: Not affected\n",
|
| 20042 |
+
" Retbleed: Mitigation; Enhanced IBRS\n",
|
| 20043 |
+
" Spec rstack overflow: Not affected\n",
|
| 20044 |
+
" Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\n",
|
| 20045 |
+
" and seccomp\n",
|
| 20046 |
+
" Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer\n",
|
| 20047 |
+
" sanitization\n",
|
| 20048 |
+
" Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB fillin\n",
|
| 20049 |
+
" g, PBRSB-eIBRS SW sequence\n",
|
| 20050 |
+
" Srbds: Not affected\n",
|
| 20051 |
+
" Tsx async abort: Not affected\n",
|
| 20052 |
+
"MemTotal: 49330024 kB\n",
|
| 20053 |
+
"Current Directory:\n",
|
| 20054 |
+
"/home/inflaton/code/logical-reasoning\n",
|
| 20055 |
+
"Evaluating Qwen/Qwen2.5-7B-Instruct with few-shot learning\n",
|
| 20056 |
+
"loading env vars from: /home/inflaton/code/logical-reasoning/.env\n",
|
| 20057 |
+
"Adding /home/inflaton/code/logical-reasoning to sys.path\n",
|
| 20058 |
+
"loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
|
| 20059 |
+
"CUDA is available, we have found 1 GPU(s)\n",
|
| 20060 |
+
"NVIDIA GeForce RTX 4090\n",
|
| 20061 |
+
"CUDA version: 12.1\n",
|
| 20062 |
+
"Qwen/Qwen2.5-7B-Instruct None False datasets/mgtv data/Qwen2.5-7B-Instruct_results.csv 2048 1\n",
|
| 20063 |
+
"(0) GPU = NVIDIA GeForce RTX 4090. Max memory = 23.988 GB.\n",
|
| 20064 |
+
"0.0 GB of memory reserved.\n",
|
| 20065 |
+
"loading model: Qwen/Qwen2.5-7B-Instruct with adapter: None\n",
|
| 20066 |
+
"config.json: 100%|βββββββββββββββββββββββββββββ| 663/663 [00:00<00:00, 10.9MB/s]\n",
|
| 20067 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:48,860 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
| 20068 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:48,861 >> Model config Qwen2Config {\n",
|
| 20069 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
| 20070 |
+
" \"architectures\": [\n",
|
| 20071 |
+
" \"Qwen2ForCausalLM\"\n",
|
| 20072 |
+
" ],\n",
|
| 20073 |
+
" \"attention_dropout\": 0.0,\n",
|
| 20074 |
+
" \"bos_token_id\": 151643,\n",
|
| 20075 |
+
" \"eos_token_id\": 151645,\n",
|
| 20076 |
+
" \"hidden_act\": \"silu\",\n",
|
| 20077 |
+
" \"hidden_size\": 3584,\n",
|
| 20078 |
+
" \"initializer_range\": 0.02,\n",
|
| 20079 |
+
" \"intermediate_size\": 18944,\n",
|
| 20080 |
+
" \"max_position_embeddings\": 32768,\n",
|
| 20081 |
+
" \"max_window_layers\": 28,\n",
|
| 20082 |
+
" \"model_type\": \"qwen2\",\n",
|
| 20083 |
+
" \"num_attention_heads\": 28,\n",
|
| 20084 |
+
" \"num_hidden_layers\": 28,\n",
|
| 20085 |
+
" \"num_key_value_heads\": 4,\n",
|
| 20086 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
| 20087 |
+
" \"rope_theta\": 1000000.0,\n",
|
| 20088 |
+
" \"sliding_window\": null,\n",
|
| 20089 |
+
" \"tie_word_embeddings\": false,\n",
|
| 20090 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
| 20091 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
| 20092 |
+
" \"use_cache\": true,\n",
|
| 20093 |
+
" \"use_sliding_window\": false,\n",
|
| 20094 |
+
" \"vocab_size\": 152064\n",
|
| 20095 |
+
"}\n",
|
| 20096 |
+
"\n",
|
| 20097 |
+
"tokenizer_config.json: 100%|βββββββββββββββ| 7.30k/7.30k [00:00<00:00, 53.0MB/s]\n",
|
| 20098 |
+
"vocab.json: 100%|ββββββββββββββββββββββββββ| 2.78M/2.78M [00:01<00:00, 1.76MB/s]\n",
|
| 20099 |
+
"merges.txt: 100%|ββββββββββββββββββββββββββ| 1.67M/1.67M [00:00<00:00, 23.9MB/s]\n",
|
| 20100 |
+
"tokenizer.json: 100%|ββββββββββββββββββββββ| 7.03M/7.03M [00:00<00:00, 10.8MB/s]\n",
|
| 20101 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,949 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
|
| 20102 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
|
| 20103 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
|
| 20104 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file added_tokens.json from cache at None\n",
|
| 20105 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file special_tokens_map.json from cache at None\n",
|
| 20106 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:53,950 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
|
| 20107 |
+
"[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:54,041 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
| 20108 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,157 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
| 20109 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,157 >> Model config Qwen2Config {\n",
|
| 20110 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
| 20111 |
+
" \"architectures\": [\n",
|
| 20112 |
+
" \"Qwen2ForCausalLM\"\n",
|
| 20113 |
+
" ],\n",
|
| 20114 |
+
" \"attention_dropout\": 0.0,\n",
|
| 20115 |
+
" \"bos_token_id\": 151643,\n",
|
| 20116 |
+
" \"eos_token_id\": 151645,\n",
|
| 20117 |
+
" \"hidden_act\": \"silu\",\n",
|
| 20118 |
+
" \"hidden_size\": 3584,\n",
|
| 20119 |
+
" \"initializer_range\": 0.02,\n",
|
| 20120 |
+
" \"intermediate_size\": 18944,\n",
|
| 20121 |
+
" \"max_position_embeddings\": 32768,\n",
|
| 20122 |
+
" \"max_window_layers\": 28,\n",
|
| 20123 |
+
" \"model_type\": \"qwen2\",\n",
|
| 20124 |
+
" \"num_attention_heads\": 28,\n",
|
| 20125 |
+
" \"num_hidden_layers\": 28,\n",
|
| 20126 |
+
" \"num_key_value_heads\": 4,\n",
|
| 20127 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
| 20128 |
+
" \"rope_theta\": 1000000.0,\n",
|
| 20129 |
+
" \"sliding_window\": null,\n",
|
| 20130 |
+
" \"tie_word_embeddings\": false,\n",
|
| 20131 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
| 20132 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
| 20133 |
+
" \"use_cache\": true,\n",
|
| 20134 |
+
" \"use_sliding_window\": false,\n",
|
| 20135 |
+
" \"vocab_size\": 152064\n",
|
| 20136 |
+
"}\n",
|
| 20137 |
+
"\n",
|
| 20138 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/vocab.json\n",
|
| 20139 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/merges.txt\n",
|
| 20140 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,421 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer.json\n",
|
| 20141 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file added_tokens.json from cache at None\n",
|
| 20142 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file special_tokens_map.json from cache at None\n",
|
| 20143 |
+
"[INFO|tokenization_utils_base.py:2289] 2024-09-21 23:55:55,422 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/tokenizer_config.json\n",
|
| 20144 |
+
"[INFO|tokenization_utils_base.py:2533] 2024-09-21 23:55:55,509 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
| 20145 |
+
"09/21/2024 23:55:55 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
|
| 20146 |
+
"[INFO|configuration_utils.py:733] 2024-09-21 23:55:55,814 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/config.json\n",
|
| 20147 |
+
"[INFO|configuration_utils.py:800] 2024-09-21 23:55:55,814 >> Model config Qwen2Config {\n",
|
| 20148 |
+
" \"_name_or_path\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
| 20149 |
+
" \"architectures\": [\n",
|
| 20150 |
+
" \"Qwen2ForCausalLM\"\n",
|
| 20151 |
+
" ],\n",
|
| 20152 |
+
" \"attention_dropout\": 0.0,\n",
|
| 20153 |
+
" \"bos_token_id\": 151643,\n",
|
| 20154 |
+
" \"eos_token_id\": 151645,\n",
|
| 20155 |
+
" \"hidden_act\": \"silu\",\n",
|
| 20156 |
+
" \"hidden_size\": 3584,\n",
|
| 20157 |
+
" \"initializer_range\": 0.02,\n",
|
| 20158 |
+
" \"intermediate_size\": 18944,\n",
|
| 20159 |
+
" \"max_position_embeddings\": 32768,\n",
|
| 20160 |
+
" \"max_window_layers\": 28,\n",
|
| 20161 |
+
" \"model_type\": \"qwen2\",\n",
|
| 20162 |
+
" \"num_attention_heads\": 28,\n",
|
| 20163 |
+
" \"num_hidden_layers\": 28,\n",
|
| 20164 |
+
" \"num_key_value_heads\": 4,\n",
|
| 20165 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
| 20166 |
+
" \"rope_theta\": 1000000.0,\n",
|
| 20167 |
+
" \"sliding_window\": null,\n",
|
| 20168 |
+
" \"tie_word_embeddings\": false,\n",
|
| 20169 |
+
" \"torch_dtype\": \"bfloat16\",\n",
|
| 20170 |
+
" \"transformers_version\": \"4.43.3\",\n",
|
| 20171 |
+
" \"use_cache\": true,\n",
|
| 20172 |
+
" \"use_sliding_window\": false,\n",
|
| 20173 |
+
" \"vocab_size\": 152064\n",
|
| 20174 |
+
"}\n",
|
| 20175 |
+
"\n",
|
| 20176 |
+
"09/21/2024 23:55:55 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
|
| 20177 |
+
"model.safetensors.index.json: 100%|ββββββββ| 27.8k/27.8k [00:00<00:00, 24.5MB/s]\n",
|
| 20178 |
+
"[INFO|modeling_utils.py:3634] 2024-09-21 23:55:56,890 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/acbd96531cda22292a3ceaa67e984955d3965282/model.safetensors.index.json\n",
|
| 20179 |
+
"Downloading shards: 0%| | 0/4 [00:00<?, ?it/s]\n",
|
| 20180 |
+
"model-00001-of-00004.safetensors: 0%| | 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
|
| 20181 |
+
"model-00001-of-00004.safetensors: 0%| | 10.5M/3.95G [00:02<13:57, 4.70MB/s]\u001b[A\n",
|
| 20182 |
+
"model-00001-of-00004.safetensors: 1%| | 21.0M/3.95G [00:03<09:51, 6.63MB/s]\u001b[A\n",
|
| 20183 |
+
"model-00001-of-00004.safetensors: 1%| | 31.5M/3.95G [00:04<07:42, 8.47MB/s]\u001b[A\n",
|
| 20184 |
+
"model-00001-of-00004.safetensors: 1%| | 41.9M/3.95G [00:05<06:45, 9.63MB/s]\u001b[A\n",
|
| 20185 |
+
"model-00001-of-00004.safetensors: 1%| | 52.4M/3.95G [00:06<06:45, 9.59MB/s]\u001b[A\n",
|
| 20186 |
+
"model-00001-of-00004.safetensors: 2%| | 62.9M/3.95G [00:07<06:38, 9.73MB/s]\u001b[A\n",
|
| 20187 |
+
"model-00001-of-00004.safetensors: 2%| | 73.4M/3.95G [00:08<06:20, 10.2MB/s]\u001b[A\n",
|
| 20188 |
+
"model-00001-of-00004.safetensors: 2%| | 83.9M/3.95G [00:08<05:58, 10.8MB/s]\u001b[A\n",
|
| 20189 |
+
"model-00001-of-00004.safetensors: 2%| | 94.4M/3.95G [00:09<05:52, 10.9MB/s]\u001b[A\n",
|
| 20190 |
+
"model-00001-of-00004.safetensors: 3%|β | 105M/3.95G [00:10<05:43, 11.2MB/s]\u001b[A\n",
|
| 20191 |
+
"model-00001-of-00004.safetensors: 3%|β | 115M/3.95G [00:11<05:32, 11.5MB/s]\u001b[A\n",
|
| 20192 |
+
"model-00001-of-00004.safetensors: 3%|β | 126M/3.95G [00:12<05:49, 10.9MB/s]\u001b[A\n",
|
| 20193 |
+
"model-00001-of-00004.safetensors: 3%|β | 136M/3.95G [00:13<06:04, 10.4MB/s]\u001b[A\n",
|
| 20194 |
+
"model-00001-of-00004.safetensors: 4%|β | 147M/3.95G [00:14<06:14, 10.2MB/s]\u001b[A\n",
|
| 20195 |
+
"model-00001-of-00004.safetensors: 4%|β | 157M/3.95G [00:16<06:31, 9.67MB/s]\u001b[A\n",
|
| 20196 |
+
"model-00001-of-00004.safetensors: 4%|β | 168M/3.95G [00:17<06:42, 9.38MB/s]\u001b[A\n",
|
| 20197 |
+
"model-00001-of-00004.safetensors: 5%|β | 178M/3.95G [00:18<06:45, 9.29MB/s]\u001b[A\n",
|
| 20198 |
+
"model-00001-of-00004.safetensors: 5%|β | 189M/3.95G [00:19<06:18, 9.92MB/s]\u001b[A\n",
|
| 20199 |
+
"model-00001-of-00004.safetensors: 5%|β | 199M/3.95G [00:20<05:50, 10.7MB/s]\u001b[A\n",
|
| 20200 |
+
"model-00001-of-00004.safetensors: 5%|β | 210M/3.95G [00:20<05:32, 11.2MB/s]\u001b[A\n",
|
| 20201 |
+
"model-00001-of-00004.safetensors: 6%|β | 220M/3.95G [00:21<05:19, 11.7MB/s]\u001b[A\n",
|
| 20202 |
+
"model-00001-of-00004.safetensors: 6%|β | 231M/3.95G [00:22<05:19, 11.6MB/s]\u001b[A\n",
|
| 20203 |
+
"model-00001-of-00004.safetensors: 6%|β | 241M/3.95G [00:23<05:12, 11.8MB/s]\u001b[A\n",
|
| 20204 |
+
"model-00001-of-00004.safetensors: 6%|β | 252M/3.95G [00:24<05:07, 12.0MB/s]\u001b[A\n",
|
| 20205 |
+
"model-00001-of-00004.safetensors: 7%|β | 262M/3.95G [00:25<05:24, 11.3MB/s]\u001b[A\n",
|
| 20206 |
+
"model-00001-of-00004.safetensors: 7%|β | 273M/3.95G [00:26<05:18, 11.5MB/s]\u001b[A\n",
|
| 20207 |
+
"model-00001-of-00004.safetensors: 7%|β | 283M/3.95G [00:27<05:42, 10.7MB/s]\u001b[A\n",
|
| 20208 |
+
"model-00001-of-00004.safetensors: 7%|β | 294M/3.95G [00:28<05:39, 10.8MB/s]\u001b[A\n",
|
| 20209 |
+
"model-00001-of-00004.safetensors: 8%|β | 304M/3.95G [00:29<05:33, 10.9MB/s]\u001b[A^C\n",
|
| 20210 |
+
"Downloading shards: 0%| | 0/4 [02:41<?, ?it/s]\n",
|
| 20211 |
+
"Traceback (most recent call last):\n",
|
| 20212 |
+
" File \"/home/inflaton/code/logical-reasoning/llm_toolkit/eval_shots.py\", line 64, in <module>\n",
|
| 20213 |
+
" model, tokenizer = load_model(\n",
|
| 20214 |
+
" ^^^^^^^^^^^\n",
|
| 20215 |
+
" File \"/home/inflaton/code/logical-reasoning/llm_toolkit/llm_utils.py\", line 52, in load_model\n",
|
| 20216 |
+
" chat_model = ChatModel(args)\n",
|
| 20217 |
+
" ^^^^^^^^^^^^^^^\n",
|
| 20218 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/chat_model.py\", line 52, in __init__\n",
|
| 20219 |
+
" self.engine: \"BaseEngine\" = HuggingfaceEngine(model_args, data_args, finetuning_args, generating_args)\n",
|
| 20220 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20221 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/chat/hf_engine.py\", line 59, in __init__\n",
|
| 20222 |
+
" self.model = load_model(\n",
|
| 20223 |
+
" ^^^^^^^^^^^\n",
|
| 20224 |
+
" File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/model/loader.py\", line 162, in load_model\n",
|
| 20225 |
+
" model = load_class.from_pretrained(**init_kwargs)\n",
|
| 20226 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20227 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py\", line 564, in from_pretrained\n",
|
| 20228 |
+
" return model_class.from_pretrained(\n",
|
| 20229 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20230 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/modeling_utils.py\", line 3671, in from_pretrained\n",
|
| 20231 |
+
" resolved_archive_file, sharded_metadata = get_checkpoint_shard_files(\n",
|
| 20232 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20233 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 1079, in get_checkpoint_shard_files\n",
|
| 20234 |
+
" cached_filename = cached_file(\n",
|
| 20235 |
+
" ^^^^^^^^^^^^\n",
|
| 20236 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/utils/hub.py\", line 402, in cached_file\n",
|
| 20237 |
+
" resolved_file = hf_hub_download(\n",
|
| 20238 |
+
" ^^^^^^^^^^^^^^^^\n",
|
| 20239 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn\n",
|
| 20240 |
+
" return fn(*args, **kwargs)\n",
|
| 20241 |
+
" ^^^^^^^^^^^^^^^^^^^\n",
|
| 20242 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1221, in hf_hub_download\n",
|
| 20243 |
+
" return _hf_hub_download_to_cache_dir(\n",
|
| 20244 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20245 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1367, in _hf_hub_download_to_cache_dir\n",
|
| 20246 |
+
" _download_to_tmp_and_move(\n",
|
| 20247 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 1884, in _download_to_tmp_and_move\n",
|
| 20248 |
+
" http_get(\n",
|
| 20249 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/huggingface_hub/file_download.py\", line 539, in http_get\n",
|
| 20250 |
+
" for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):\n",
|
| 20251 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/requests/models.py\", line 820, in generate\n",
|
| 20252 |
+
" yield from self.raw.stream(chunk_size, decode_content=True)\n",
|
| 20253 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 1060, in stream\n",
|
| 20254 |
+
" data = self.read(amt=amt, decode_content=decode_content)\n",
|
| 20255 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20256 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 949, in read\n",
|
| 20257 |
+
" data = self._raw_read(amt)\n",
|
| 20258 |
+
" ^^^^^^^^^^^^^^^^^^^\n",
|
| 20259 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 873, in _raw_read\n",
|
| 20260 |
+
" data = self._fp_read(amt, read1=read1) if not fp_closed else b\"\"\n",
|
| 20261 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20262 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/urllib3/response.py\", line 856, in _fp_read\n",
|
| 20263 |
+
" return self._fp.read(amt) if amt is not None else self._fp.read()\n",
|
| 20264 |
+
" ^^^^^^^^^^^^^^^^^^\n",
|
| 20265 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/http/client.py\", line 473, in read\n",
|
| 20266 |
+
" s = self.fp.read(amt)\n",
|
| 20267 |
+
" ^^^^^^^^^^^^^^^^^\n",
|
| 20268 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/socket.py\", line 706, in readinto\n",
|
| 20269 |
+
" return self._sock.recv_into(b)\n",
|
| 20270 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20271 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1314, in recv_into\n",
|
| 20272 |
+
" return self.read(nbytes, buffer)\n",
|
| 20273 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20274 |
+
" File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/ssl.py\", line 1166, in read\n",
|
| 20275 |
+
" return self._sslobj.read(len, buffer)\n",
|
| 20276 |
+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
| 20277 |
+
"KeyboardInterrupt\n",
|
| 20278 |
+
"model-00001-of-00004.safetensors: 8%|β | 304M/3.95G [00:29<05:57, 10.2MB/s]\n",
|
| 20279 |
+
"CPU times: user 901 ms, sys: 326 ms, total: 1.23 s\n",
|
| 20280 |
+
"Wall time: 2min 55s\n"
|
| 20281 |
+
]
|
| 20282 |
+
}
|
| 20283 |
+
],
|
| 20284 |
"source": [
|
| 20285 |
"%%time\n",
|
| 20286 |
"\n",
|
| 20287 |
+
"!./scripts/eval-mgtv-qwen2.5_3b.sh"
|
| 20288 |
]
|
| 20289 |
}
|
| 20290 |
],
|
scripts/eval-mgtv-qwen2.5_3b.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
|
| 3 |
+
BASEDIR=$(dirname "$0")
|
| 4 |
+
cd $BASEDIR/..
|
| 5 |
+
echo Current Directory:
|
| 6 |
+
pwd
|
| 7 |
+
|
| 8 |
+
BASEDIR=`pwd`
|
| 9 |
+
|
| 10 |
+
nvidia-smi
|
| 11 |
+
uname -a
|
| 12 |
+
cat /etc/os-release
|
| 13 |
+
lscpu
|
| 14 |
+
grep MemTotal /proc/meminfo
|
| 15 |
+
|
| 16 |
+
# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct
|
| 17 |
+
|
| 18 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct
|
| 19 |
+
|
| 20 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct
|
| 21 |
+
|
| 22 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-3B-Instruct
|
| 23 |
+
|
| 24 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct
|
| 25 |
+
|
| 26 |
+
$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct
|
| 27 |
+
|